• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
47 MODULE_FIRMWARE("radeon/bonaire_me.bin");
48 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
53 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
64 
65 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
66 MODULE_FIRMWARE("radeon/hawaii_me.bin");
67 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
72 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
80 
81 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
82 MODULE_FIRMWARE("radeon/kaveri_me.bin");
83 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
86 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
87 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
88 
89 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KABINI_me.bin");
91 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
92 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
93 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
95 
96 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
97 MODULE_FIRMWARE("radeon/kabini_me.bin");
98 MODULE_FIRMWARE("radeon/kabini_ce.bin");
99 MODULE_FIRMWARE("radeon/kabini_mec.bin");
100 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
101 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
102 
103 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
109 
110 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
111 MODULE_FIRMWARE("radeon/mullins_me.bin");
112 MODULE_FIRMWARE("radeon/mullins_ce.bin");
113 MODULE_FIRMWARE("radeon/mullins_mec.bin");
114 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
115 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
116 
117 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
118 extern void r600_ih_ring_fini(struct radeon_device *rdev);
119 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
120 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
125 extern void si_rlc_reset(struct radeon_device *rdev);
126 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
127 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
128 extern int cik_sdma_resume(struct radeon_device *rdev);
129 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
130 extern void cik_sdma_fini(struct radeon_device *rdev);
131 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
132 static void cik_rlc_stop(struct radeon_device *rdev);
133 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
134 static void cik_program_aspm(struct radeon_device *rdev);
135 static void cik_init_pg(struct radeon_device *rdev);
136 static void cik_init_cg(struct radeon_device *rdev);
137 static void cik_fini_pg(struct radeon_device *rdev);
138 static void cik_fini_cg(struct radeon_device *rdev);
139 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
140 					  bool enable);
141 
142 /* get temperature in millidegrees */
ci_get_temp(struct radeon_device * rdev)143 int ci_get_temp(struct radeon_device *rdev)
144 {
145 	u32 temp;
146 	int actual_temp = 0;
147 
148 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
149 		CTF_TEMP_SHIFT;
150 
151 	if (temp & 0x200)
152 		actual_temp = 255;
153 	else
154 		actual_temp = temp & 0x1ff;
155 
156 	actual_temp = actual_temp * 1000;
157 
158 	return actual_temp;
159 }
160 
161 /* get temperature in millidegrees */
kv_get_temp(struct radeon_device * rdev)162 int kv_get_temp(struct radeon_device *rdev)
163 {
164 	u32 temp;
165 	int actual_temp = 0;
166 
167 	temp = RREG32_SMC(0xC0300E0C);
168 
169 	if (temp)
170 		actual_temp = (temp / 8) - 49;
171 	else
172 		actual_temp = 0;
173 
174 	actual_temp = actual_temp * 1000;
175 
176 	return actual_temp;
177 }
178 
179 /*
180  * Indirect registers accessor
181  */
cik_pciep_rreg(struct radeon_device * rdev,u32 reg)182 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
183 {
184 	unsigned long flags;
185 	u32 r;
186 
187 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
188 	WREG32(PCIE_INDEX, reg);
189 	(void)RREG32(PCIE_INDEX);
190 	r = RREG32(PCIE_DATA);
191 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
192 	return r;
193 }
194 
cik_pciep_wreg(struct radeon_device * rdev,u32 reg,u32 v)195 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
196 {
197 	unsigned long flags;
198 
199 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
200 	WREG32(PCIE_INDEX, reg);
201 	(void)RREG32(PCIE_INDEX);
202 	WREG32(PCIE_DATA, v);
203 	(void)RREG32(PCIE_DATA);
204 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
205 }
206 
207 static const u32 spectre_rlc_save_restore_register_list[] =
208 {
209 	(0x0e00 << 16) | (0xc12c >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0xc140 >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc150 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0xc15c >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0xc168 >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0xc170 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0xc178 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0xc204 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc2b4 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc2b8 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc2bc >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc2c0 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x8228 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x829c >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x869c >> 2),
238 	0x00000000,
239 	(0x0600 << 16) | (0x98f4 >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0x98f8 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0x9900 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc260 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0x90e8 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0x3c000 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0x3c00c >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0x8c1c >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0x9700 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xcd20 >> 2),
258 	0x00000000,
259 	(0x4e00 << 16) | (0xcd20 >> 2),
260 	0x00000000,
261 	(0x5e00 << 16) | (0xcd20 >> 2),
262 	0x00000000,
263 	(0x6e00 << 16) | (0xcd20 >> 2),
264 	0x00000000,
265 	(0x7e00 << 16) | (0xcd20 >> 2),
266 	0x00000000,
267 	(0x8e00 << 16) | (0xcd20 >> 2),
268 	0x00000000,
269 	(0x9e00 << 16) | (0xcd20 >> 2),
270 	0x00000000,
271 	(0xae00 << 16) | (0xcd20 >> 2),
272 	0x00000000,
273 	(0xbe00 << 16) | (0xcd20 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x89bc >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x8900 >> 2),
278 	0x00000000,
279 	0x3,
280 	(0x0e00 << 16) | (0xc130 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc134 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc1fc >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc208 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc264 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc268 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc26c >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc270 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc274 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc278 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc27c >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc280 >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc284 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc288 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc28c >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0xc290 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0xc294 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0xc298 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0xc29c >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0xc2a0 >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0xc2a4 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0xc2a8 >> 2),
323 	0x00000000,
324 	(0x0e00 << 16) | (0xc2ac  >> 2),
325 	0x00000000,
326 	(0x0e00 << 16) | (0xc2b0 >> 2),
327 	0x00000000,
328 	(0x0e00 << 16) | (0x301d0 >> 2),
329 	0x00000000,
330 	(0x0e00 << 16) | (0x30238 >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0x30250 >> 2),
333 	0x00000000,
334 	(0x0e00 << 16) | (0x30254 >> 2),
335 	0x00000000,
336 	(0x0e00 << 16) | (0x30258 >> 2),
337 	0x00000000,
338 	(0x0e00 << 16) | (0x3025c >> 2),
339 	0x00000000,
340 	(0x4e00 << 16) | (0xc900 >> 2),
341 	0x00000000,
342 	(0x5e00 << 16) | (0xc900 >> 2),
343 	0x00000000,
344 	(0x6e00 << 16) | (0xc900 >> 2),
345 	0x00000000,
346 	(0x7e00 << 16) | (0xc900 >> 2),
347 	0x00000000,
348 	(0x8e00 << 16) | (0xc900 >> 2),
349 	0x00000000,
350 	(0x9e00 << 16) | (0xc900 >> 2),
351 	0x00000000,
352 	(0xae00 << 16) | (0xc900 >> 2),
353 	0x00000000,
354 	(0xbe00 << 16) | (0xc900 >> 2),
355 	0x00000000,
356 	(0x4e00 << 16) | (0xc904 >> 2),
357 	0x00000000,
358 	(0x5e00 << 16) | (0xc904 >> 2),
359 	0x00000000,
360 	(0x6e00 << 16) | (0xc904 >> 2),
361 	0x00000000,
362 	(0x7e00 << 16) | (0xc904 >> 2),
363 	0x00000000,
364 	(0x8e00 << 16) | (0xc904 >> 2),
365 	0x00000000,
366 	(0x9e00 << 16) | (0xc904 >> 2),
367 	0x00000000,
368 	(0xae00 << 16) | (0xc904 >> 2),
369 	0x00000000,
370 	(0xbe00 << 16) | (0xc904 >> 2),
371 	0x00000000,
372 	(0x4e00 << 16) | (0xc908 >> 2),
373 	0x00000000,
374 	(0x5e00 << 16) | (0xc908 >> 2),
375 	0x00000000,
376 	(0x6e00 << 16) | (0xc908 >> 2),
377 	0x00000000,
378 	(0x7e00 << 16) | (0xc908 >> 2),
379 	0x00000000,
380 	(0x8e00 << 16) | (0xc908 >> 2),
381 	0x00000000,
382 	(0x9e00 << 16) | (0xc908 >> 2),
383 	0x00000000,
384 	(0xae00 << 16) | (0xc908 >> 2),
385 	0x00000000,
386 	(0xbe00 << 16) | (0xc908 >> 2),
387 	0x00000000,
388 	(0x4e00 << 16) | (0xc90c >> 2),
389 	0x00000000,
390 	(0x5e00 << 16) | (0xc90c >> 2),
391 	0x00000000,
392 	(0x6e00 << 16) | (0xc90c >> 2),
393 	0x00000000,
394 	(0x7e00 << 16) | (0xc90c >> 2),
395 	0x00000000,
396 	(0x8e00 << 16) | (0xc90c >> 2),
397 	0x00000000,
398 	(0x9e00 << 16) | (0xc90c >> 2),
399 	0x00000000,
400 	(0xae00 << 16) | (0xc90c >> 2),
401 	0x00000000,
402 	(0xbe00 << 16) | (0xc90c >> 2),
403 	0x00000000,
404 	(0x4e00 << 16) | (0xc910 >> 2),
405 	0x00000000,
406 	(0x5e00 << 16) | (0xc910 >> 2),
407 	0x00000000,
408 	(0x6e00 << 16) | (0xc910 >> 2),
409 	0x00000000,
410 	(0x7e00 << 16) | (0xc910 >> 2),
411 	0x00000000,
412 	(0x8e00 << 16) | (0xc910 >> 2),
413 	0x00000000,
414 	(0x9e00 << 16) | (0xc910 >> 2),
415 	0x00000000,
416 	(0xae00 << 16) | (0xc910 >> 2),
417 	0x00000000,
418 	(0xbe00 << 16) | (0xc910 >> 2),
419 	0x00000000,
420 	(0x0e00 << 16) | (0xc99c >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0x9834 >> 2),
423 	0x00000000,
424 	(0x0000 << 16) | (0x30f00 >> 2),
425 	0x00000000,
426 	(0x0001 << 16) | (0x30f00 >> 2),
427 	0x00000000,
428 	(0x0000 << 16) | (0x30f04 >> 2),
429 	0x00000000,
430 	(0x0001 << 16) | (0x30f04 >> 2),
431 	0x00000000,
432 	(0x0000 << 16) | (0x30f08 >> 2),
433 	0x00000000,
434 	(0x0001 << 16) | (0x30f08 >> 2),
435 	0x00000000,
436 	(0x0000 << 16) | (0x30f0c >> 2),
437 	0x00000000,
438 	(0x0001 << 16) | (0x30f0c >> 2),
439 	0x00000000,
440 	(0x0600 << 16) | (0x9b7c >> 2),
441 	0x00000000,
442 	(0x0e00 << 16) | (0x8a14 >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x8a18 >> 2),
445 	0x00000000,
446 	(0x0600 << 16) | (0x30a00 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0x8bf0 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x8bcc >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x8b24 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x30a04 >> 2),
455 	0x00000000,
456 	(0x0600 << 16) | (0x30a10 >> 2),
457 	0x00000000,
458 	(0x0600 << 16) | (0x30a14 >> 2),
459 	0x00000000,
460 	(0x0600 << 16) | (0x30a18 >> 2),
461 	0x00000000,
462 	(0x0600 << 16) | (0x30a2c >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0xc700 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0xc704 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0xc708 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xc768 >> 2),
471 	0x00000000,
472 	(0x0400 << 16) | (0xc770 >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc774 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc778 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc77c >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc780 >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc784 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc788 >> 2),
485 	0x00000000,
486 	(0x0400 << 16) | (0xc78c >> 2),
487 	0x00000000,
488 	(0x0400 << 16) | (0xc798 >> 2),
489 	0x00000000,
490 	(0x0400 << 16) | (0xc79c >> 2),
491 	0x00000000,
492 	(0x0400 << 16) | (0xc7a0 >> 2),
493 	0x00000000,
494 	(0x0400 << 16) | (0xc7a4 >> 2),
495 	0x00000000,
496 	(0x0400 << 16) | (0xc7a8 >> 2),
497 	0x00000000,
498 	(0x0400 << 16) | (0xc7ac >> 2),
499 	0x00000000,
500 	(0x0400 << 16) | (0xc7b0 >> 2),
501 	0x00000000,
502 	(0x0400 << 16) | (0xc7b4 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x9100 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x3c010 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x92a8 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x92ac >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x92b4 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x92b8 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x92bc >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x92c0 >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x92c4 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x92c8 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x92cc >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x92d0 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x8c00 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x8c04 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x8c20 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x8c38 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x8c3c >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0xae00 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0x9604 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0xac08 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac0c >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac10 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac14 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac58 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac68 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xac6c >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xac70 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0xac74 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0xac78 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0xac7c >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0xac80 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0xac84 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0xac88 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0xac8c >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x970c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x9714 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x9718 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x971c >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x31068 >> 2),
581 	0x00000000,
582 	(0x4e00 << 16) | (0x31068 >> 2),
583 	0x00000000,
584 	(0x5e00 << 16) | (0x31068 >> 2),
585 	0x00000000,
586 	(0x6e00 << 16) | (0x31068 >> 2),
587 	0x00000000,
588 	(0x7e00 << 16) | (0x31068 >> 2),
589 	0x00000000,
590 	(0x8e00 << 16) | (0x31068 >> 2),
591 	0x00000000,
592 	(0x9e00 << 16) | (0x31068 >> 2),
593 	0x00000000,
594 	(0xae00 << 16) | (0x31068 >> 2),
595 	0x00000000,
596 	(0xbe00 << 16) | (0x31068 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xcd10 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xcd14 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x88b0 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x88b4 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x88b8 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x88bc >> 2),
609 	0x00000000,
610 	(0x0400 << 16) | (0x89c0 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0x88c4 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x88c8 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x88d0 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x88d4 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x88d8 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x8980 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x30938 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x3093c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x30940 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x89a0 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x30900 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x30904 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x89b4 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x3c210 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c214 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c218 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x8904 >> 2),
645 	0x00000000,
646 	0x5,
647 	(0x0e00 << 16) | (0x8c28 >> 2),
648 	(0x0e00 << 16) | (0x8c2c >> 2),
649 	(0x0e00 << 16) | (0x8c30 >> 2),
650 	(0x0e00 << 16) | (0x8c34 >> 2),
651 	(0x0e00 << 16) | (0x9600 >> 2),
652 };
653 
654 static const u32 kalindi_rlc_save_restore_register_list[] =
655 {
656 	(0x0e00 << 16) | (0xc12c >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xc140 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xc150 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc15c >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc168 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc170 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc204 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc2b4 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc2b8 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc2bc >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc2c0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x8228 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x829c >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x869c >> 2),
683 	0x00000000,
684 	(0x0600 << 16) | (0x98f4 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x98f8 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x9900 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc260 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x90e8 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x3c000 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x3c00c >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x8c1c >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x9700 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0xcd20 >> 2),
703 	0x00000000,
704 	(0x4e00 << 16) | (0xcd20 >> 2),
705 	0x00000000,
706 	(0x5e00 << 16) | (0xcd20 >> 2),
707 	0x00000000,
708 	(0x6e00 << 16) | (0xcd20 >> 2),
709 	0x00000000,
710 	(0x7e00 << 16) | (0xcd20 >> 2),
711 	0x00000000,
712 	(0x0e00 << 16) | (0x89bc >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0x8900 >> 2),
715 	0x00000000,
716 	0x3,
717 	(0x0e00 << 16) | (0xc130 >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0xc134 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc1fc >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc208 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc264 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc268 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc26c >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc270 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc274 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc28c >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc290 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc294 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc298 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xc2a0 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xc2a4 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0xc2a8 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc2ac >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x301d0 >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x30238 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x30250 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x30254 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x30258 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x3025c >> 2),
762 	0x00000000,
763 	(0x4e00 << 16) | (0xc900 >> 2),
764 	0x00000000,
765 	(0x5e00 << 16) | (0xc900 >> 2),
766 	0x00000000,
767 	(0x6e00 << 16) | (0xc900 >> 2),
768 	0x00000000,
769 	(0x7e00 << 16) | (0xc900 >> 2),
770 	0x00000000,
771 	(0x4e00 << 16) | (0xc904 >> 2),
772 	0x00000000,
773 	(0x5e00 << 16) | (0xc904 >> 2),
774 	0x00000000,
775 	(0x6e00 << 16) | (0xc904 >> 2),
776 	0x00000000,
777 	(0x7e00 << 16) | (0xc904 >> 2),
778 	0x00000000,
779 	(0x4e00 << 16) | (0xc908 >> 2),
780 	0x00000000,
781 	(0x5e00 << 16) | (0xc908 >> 2),
782 	0x00000000,
783 	(0x6e00 << 16) | (0xc908 >> 2),
784 	0x00000000,
785 	(0x7e00 << 16) | (0xc908 >> 2),
786 	0x00000000,
787 	(0x4e00 << 16) | (0xc90c >> 2),
788 	0x00000000,
789 	(0x5e00 << 16) | (0xc90c >> 2),
790 	0x00000000,
791 	(0x6e00 << 16) | (0xc90c >> 2),
792 	0x00000000,
793 	(0x7e00 << 16) | (0xc90c >> 2),
794 	0x00000000,
795 	(0x4e00 << 16) | (0xc910 >> 2),
796 	0x00000000,
797 	(0x5e00 << 16) | (0xc910 >> 2),
798 	0x00000000,
799 	(0x6e00 << 16) | (0xc910 >> 2),
800 	0x00000000,
801 	(0x7e00 << 16) | (0xc910 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc99c >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x9834 >> 2),
806 	0x00000000,
807 	(0x0000 << 16) | (0x30f00 >> 2),
808 	0x00000000,
809 	(0x0000 << 16) | (0x30f04 >> 2),
810 	0x00000000,
811 	(0x0000 << 16) | (0x30f08 >> 2),
812 	0x00000000,
813 	(0x0000 << 16) | (0x30f0c >> 2),
814 	0x00000000,
815 	(0x0600 << 16) | (0x9b7c >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x8a14 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8a18 >> 2),
820 	0x00000000,
821 	(0x0600 << 16) | (0x30a00 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x8bf0 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x8bcc >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x8b24 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0x30a04 >> 2),
830 	0x00000000,
831 	(0x0600 << 16) | (0x30a10 >> 2),
832 	0x00000000,
833 	(0x0600 << 16) | (0x30a14 >> 2),
834 	0x00000000,
835 	(0x0600 << 16) | (0x30a18 >> 2),
836 	0x00000000,
837 	(0x0600 << 16) | (0x30a2c >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xc700 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xc704 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xc708 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xc768 >> 2),
846 	0x00000000,
847 	(0x0400 << 16) | (0xc770 >> 2),
848 	0x00000000,
849 	(0x0400 << 16) | (0xc774 >> 2),
850 	0x00000000,
851 	(0x0400 << 16) | (0xc798 >> 2),
852 	0x00000000,
853 	(0x0400 << 16) | (0xc79c >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x9100 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x3c010 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x8c00 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x8c04 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x8c20 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x8c38 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x8c3c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xae00 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x9604 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0xac08 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac0c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac10 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac14 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac58 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac68 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0xac6c >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0xac70 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0xac74 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0xac78 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0xac7c >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0xac80 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0xac84 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xac88 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xac8c >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x970c >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x9714 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x9718 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x971c >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x31068 >> 2),
912 	0x00000000,
913 	(0x4e00 << 16) | (0x31068 >> 2),
914 	0x00000000,
915 	(0x5e00 << 16) | (0x31068 >> 2),
916 	0x00000000,
917 	(0x6e00 << 16) | (0x31068 >> 2),
918 	0x00000000,
919 	(0x7e00 << 16) | (0x31068 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0xcd10 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0xcd14 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x88b0 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x88b4 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x88b8 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x88bc >> 2),
932 	0x00000000,
933 	(0x0400 << 16) | (0x89c0 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x88c4 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x88c8 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x88d0 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x88d4 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x88d8 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x8980 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x30938 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x3093c >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x30940 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0x89a0 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0x30900 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0x30904 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0x89b4 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0x3e1fc >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x3c210 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x3c214 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x3c218 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x8904 >> 2),
970 	0x00000000,
971 	0x5,
972 	(0x0e00 << 16) | (0x8c28 >> 2),
973 	(0x0e00 << 16) | (0x8c2c >> 2),
974 	(0x0e00 << 16) | (0x8c30 >> 2),
975 	(0x0e00 << 16) | (0x8c34 >> 2),
976 	(0x0e00 << 16) | (0x9600 >> 2),
977 };
978 
979 static const u32 bonaire_golden_spm_registers[] =
980 {
981 	0x30800, 0xe0ffffff, 0xe0000000
982 };
983 
984 static const u32 bonaire_golden_common_registers[] =
985 {
986 	0xc770, 0xffffffff, 0x00000800,
987 	0xc774, 0xffffffff, 0x00000800,
988 	0xc798, 0xffffffff, 0x00007fbf,
989 	0xc79c, 0xffffffff, 0x00007faf
990 };
991 
992 static const u32 bonaire_golden_registers[] =
993 {
994 	0x3354, 0x00000333, 0x00000333,
995 	0x3350, 0x000c0fc0, 0x00040200,
996 	0x9a10, 0x00010000, 0x00058208,
997 	0x3c000, 0xffff1fff, 0x00140000,
998 	0x3c200, 0xfdfc0fff, 0x00000100,
999 	0x3c234, 0x40000000, 0x40000200,
1000 	0x9830, 0xffffffff, 0x00000000,
1001 	0x9834, 0xf00fffff, 0x00000400,
1002 	0x9838, 0x0002021c, 0x00020200,
1003 	0xc78, 0x00000080, 0x00000000,
1004 	0x5bb0, 0x000000f0, 0x00000070,
1005 	0x5bc0, 0xf0311fff, 0x80300000,
1006 	0x98f8, 0x73773777, 0x12010001,
1007 	0x350c, 0x00810000, 0x408af000,
1008 	0x7030, 0x31000111, 0x00000011,
1009 	0x2f48, 0x73773777, 0x12010001,
1010 	0x220c, 0x00007fb6, 0x0021a1b1,
1011 	0x2210, 0x00007fb6, 0x002021b1,
1012 	0x2180, 0x00007fb6, 0x00002191,
1013 	0x2218, 0x00007fb6, 0x002121b1,
1014 	0x221c, 0x00007fb6, 0x002021b1,
1015 	0x21dc, 0x00007fb6, 0x00002191,
1016 	0x21e0, 0x00007fb6, 0x00002191,
1017 	0x3628, 0x0000003f, 0x0000000a,
1018 	0x362c, 0x0000003f, 0x0000000a,
1019 	0x2ae4, 0x00073ffe, 0x000022a2,
1020 	0x240c, 0x000007ff, 0x00000000,
1021 	0x8a14, 0xf000003f, 0x00000007,
1022 	0x8bf0, 0x00002001, 0x00000001,
1023 	0x8b24, 0xffffffff, 0x00ffffff,
1024 	0x30a04, 0x0000ff0f, 0x00000000,
1025 	0x28a4c, 0x07ffffff, 0x06000000,
1026 	0x4d8, 0x00000fff, 0x00000100,
1027 	0x3e78, 0x00000001, 0x00000002,
1028 	0x9100, 0x03000000, 0x0362c688,
1029 	0x8c00, 0x000000ff, 0x00000001,
1030 	0xe40, 0x00001fff, 0x00001fff,
1031 	0x9060, 0x0000007f, 0x00000020,
1032 	0x9508, 0x00010000, 0x00010000,
1033 	0xac14, 0x000003ff, 0x000000f3,
1034 	0xac0c, 0xffffffff, 0x00001032
1035 };
1036 
1037 static const u32 bonaire_mgcg_cgcg_init[] =
1038 {
1039 	0xc420, 0xffffffff, 0xfffffffc,
1040 	0x30800, 0xffffffff, 0xe0000000,
1041 	0x3c2a0, 0xffffffff, 0x00000100,
1042 	0x3c208, 0xffffffff, 0x00000100,
1043 	0x3c2c0, 0xffffffff, 0xc0000100,
1044 	0x3c2c8, 0xffffffff, 0xc0000100,
1045 	0x3c2c4, 0xffffffff, 0xc0000100,
1046 	0x55e4, 0xffffffff, 0x00600100,
1047 	0x3c280, 0xffffffff, 0x00000100,
1048 	0x3c214, 0xffffffff, 0x06000100,
1049 	0x3c220, 0xffffffff, 0x00000100,
1050 	0x3c218, 0xffffffff, 0x06000100,
1051 	0x3c204, 0xffffffff, 0x00000100,
1052 	0x3c2e0, 0xffffffff, 0x00000100,
1053 	0x3c224, 0xffffffff, 0x00000100,
1054 	0x3c200, 0xffffffff, 0x00000100,
1055 	0x3c230, 0xffffffff, 0x00000100,
1056 	0x3c234, 0xffffffff, 0x00000100,
1057 	0x3c250, 0xffffffff, 0x00000100,
1058 	0x3c254, 0xffffffff, 0x00000100,
1059 	0x3c258, 0xffffffff, 0x00000100,
1060 	0x3c25c, 0xffffffff, 0x00000100,
1061 	0x3c260, 0xffffffff, 0x00000100,
1062 	0x3c27c, 0xffffffff, 0x00000100,
1063 	0x3c278, 0xffffffff, 0x00000100,
1064 	0x3c210, 0xffffffff, 0x06000100,
1065 	0x3c290, 0xffffffff, 0x00000100,
1066 	0x3c274, 0xffffffff, 0x00000100,
1067 	0x3c2b4, 0xffffffff, 0x00000100,
1068 	0x3c2b0, 0xffffffff, 0x00000100,
1069 	0x3c270, 0xffffffff, 0x00000100,
1070 	0x30800, 0xffffffff, 0xe0000000,
1071 	0x3c020, 0xffffffff, 0x00010000,
1072 	0x3c024, 0xffffffff, 0x00030002,
1073 	0x3c028, 0xffffffff, 0x00040007,
1074 	0x3c02c, 0xffffffff, 0x00060005,
1075 	0x3c030, 0xffffffff, 0x00090008,
1076 	0x3c034, 0xffffffff, 0x00010000,
1077 	0x3c038, 0xffffffff, 0x00030002,
1078 	0x3c03c, 0xffffffff, 0x00040007,
1079 	0x3c040, 0xffffffff, 0x00060005,
1080 	0x3c044, 0xffffffff, 0x00090008,
1081 	0x3c048, 0xffffffff, 0x00010000,
1082 	0x3c04c, 0xffffffff, 0x00030002,
1083 	0x3c050, 0xffffffff, 0x00040007,
1084 	0x3c054, 0xffffffff, 0x00060005,
1085 	0x3c058, 0xffffffff, 0x00090008,
1086 	0x3c05c, 0xffffffff, 0x00010000,
1087 	0x3c060, 0xffffffff, 0x00030002,
1088 	0x3c064, 0xffffffff, 0x00040007,
1089 	0x3c068, 0xffffffff, 0x00060005,
1090 	0x3c06c, 0xffffffff, 0x00090008,
1091 	0x3c070, 0xffffffff, 0x00010000,
1092 	0x3c074, 0xffffffff, 0x00030002,
1093 	0x3c078, 0xffffffff, 0x00040007,
1094 	0x3c07c, 0xffffffff, 0x00060005,
1095 	0x3c080, 0xffffffff, 0x00090008,
1096 	0x3c084, 0xffffffff, 0x00010000,
1097 	0x3c088, 0xffffffff, 0x00030002,
1098 	0x3c08c, 0xffffffff, 0x00040007,
1099 	0x3c090, 0xffffffff, 0x00060005,
1100 	0x3c094, 0xffffffff, 0x00090008,
1101 	0x3c098, 0xffffffff, 0x00010000,
1102 	0x3c09c, 0xffffffff, 0x00030002,
1103 	0x3c0a0, 0xffffffff, 0x00040007,
1104 	0x3c0a4, 0xffffffff, 0x00060005,
1105 	0x3c0a8, 0xffffffff, 0x00090008,
1106 	0x3c000, 0xffffffff, 0x96e00200,
1107 	0x8708, 0xffffffff, 0x00900100,
1108 	0xc424, 0xffffffff, 0x0020003f,
1109 	0x38, 0xffffffff, 0x0140001c,
1110 	0x3c, 0x000f0000, 0x000f0000,
1111 	0x220, 0xffffffff, 0xC060000C,
1112 	0x224, 0xc0000fff, 0x00000100,
1113 	0xf90, 0xffffffff, 0x00000100,
1114 	0xf98, 0x00000101, 0x00000000,
1115 	0x20a8, 0xffffffff, 0x00000104,
1116 	0x55e4, 0xff000fff, 0x00000100,
1117 	0x30cc, 0xc0000fff, 0x00000104,
1118 	0xc1e4, 0x00000001, 0x00000001,
1119 	0xd00c, 0xff000ff0, 0x00000100,
1120 	0xd80c, 0xff000ff0, 0x00000100
1121 };
1122 
1123 static const u32 spectre_golden_spm_registers[] =
1124 {
1125 	0x30800, 0xe0ffffff, 0xe0000000
1126 };
1127 
1128 static const u32 spectre_golden_common_registers[] =
1129 {
1130 	0xc770, 0xffffffff, 0x00000800,
1131 	0xc774, 0xffffffff, 0x00000800,
1132 	0xc798, 0xffffffff, 0x00007fbf,
1133 	0xc79c, 0xffffffff, 0x00007faf
1134 };
1135 
1136 static const u32 spectre_golden_registers[] =
1137 {
1138 	0x3c000, 0xffff1fff, 0x96940200,
1139 	0x3c00c, 0xffff0001, 0xff000000,
1140 	0x3c200, 0xfffc0fff, 0x00000100,
1141 	0x6ed8, 0x00010101, 0x00010000,
1142 	0x9834, 0xf00fffff, 0x00000400,
1143 	0x9838, 0xfffffffc, 0x00020200,
1144 	0x5bb0, 0x000000f0, 0x00000070,
1145 	0x5bc0, 0xf0311fff, 0x80300000,
1146 	0x98f8, 0x73773777, 0x12010001,
1147 	0x9b7c, 0x00ff0000, 0x00fc0000,
1148 	0x2f48, 0x73773777, 0x12010001,
1149 	0x8a14, 0xf000003f, 0x00000007,
1150 	0x8b24, 0xffffffff, 0x00ffffff,
1151 	0x28350, 0x3f3f3fff, 0x00000082,
1152 	0x28354, 0x0000003f, 0x00000000,
1153 	0x3e78, 0x00000001, 0x00000002,
1154 	0x913c, 0xffff03df, 0x00000004,
1155 	0xc768, 0x00000008, 0x00000008,
1156 	0x8c00, 0x000008ff, 0x00000800,
1157 	0x9508, 0x00010000, 0x00010000,
1158 	0xac0c, 0xffffffff, 0x54763210,
1159 	0x214f8, 0x01ff01ff, 0x00000002,
1160 	0x21498, 0x007ff800, 0x00200000,
1161 	0x2015c, 0xffffffff, 0x00000f40,
1162 	0x30934, 0xffffffff, 0x00000001
1163 };
1164 
1165 static const u32 spectre_mgcg_cgcg_init[] =
1166 {
1167 	0xc420, 0xffffffff, 0xfffffffc,
1168 	0x30800, 0xffffffff, 0xe0000000,
1169 	0x3c2a0, 0xffffffff, 0x00000100,
1170 	0x3c208, 0xffffffff, 0x00000100,
1171 	0x3c2c0, 0xffffffff, 0x00000100,
1172 	0x3c2c8, 0xffffffff, 0x00000100,
1173 	0x3c2c4, 0xffffffff, 0x00000100,
1174 	0x55e4, 0xffffffff, 0x00600100,
1175 	0x3c280, 0xffffffff, 0x00000100,
1176 	0x3c214, 0xffffffff, 0x06000100,
1177 	0x3c220, 0xffffffff, 0x00000100,
1178 	0x3c218, 0xffffffff, 0x06000100,
1179 	0x3c204, 0xffffffff, 0x00000100,
1180 	0x3c2e0, 0xffffffff, 0x00000100,
1181 	0x3c224, 0xffffffff, 0x00000100,
1182 	0x3c200, 0xffffffff, 0x00000100,
1183 	0x3c230, 0xffffffff, 0x00000100,
1184 	0x3c234, 0xffffffff, 0x00000100,
1185 	0x3c250, 0xffffffff, 0x00000100,
1186 	0x3c254, 0xffffffff, 0x00000100,
1187 	0x3c258, 0xffffffff, 0x00000100,
1188 	0x3c25c, 0xffffffff, 0x00000100,
1189 	0x3c260, 0xffffffff, 0x00000100,
1190 	0x3c27c, 0xffffffff, 0x00000100,
1191 	0x3c278, 0xffffffff, 0x00000100,
1192 	0x3c210, 0xffffffff, 0x06000100,
1193 	0x3c290, 0xffffffff, 0x00000100,
1194 	0x3c274, 0xffffffff, 0x00000100,
1195 	0x3c2b4, 0xffffffff, 0x00000100,
1196 	0x3c2b0, 0xffffffff, 0x00000100,
1197 	0x3c270, 0xffffffff, 0x00000100,
1198 	0x30800, 0xffffffff, 0xe0000000,
1199 	0x3c020, 0xffffffff, 0x00010000,
1200 	0x3c024, 0xffffffff, 0x00030002,
1201 	0x3c028, 0xffffffff, 0x00040007,
1202 	0x3c02c, 0xffffffff, 0x00060005,
1203 	0x3c030, 0xffffffff, 0x00090008,
1204 	0x3c034, 0xffffffff, 0x00010000,
1205 	0x3c038, 0xffffffff, 0x00030002,
1206 	0x3c03c, 0xffffffff, 0x00040007,
1207 	0x3c040, 0xffffffff, 0x00060005,
1208 	0x3c044, 0xffffffff, 0x00090008,
1209 	0x3c048, 0xffffffff, 0x00010000,
1210 	0x3c04c, 0xffffffff, 0x00030002,
1211 	0x3c050, 0xffffffff, 0x00040007,
1212 	0x3c054, 0xffffffff, 0x00060005,
1213 	0x3c058, 0xffffffff, 0x00090008,
1214 	0x3c05c, 0xffffffff, 0x00010000,
1215 	0x3c060, 0xffffffff, 0x00030002,
1216 	0x3c064, 0xffffffff, 0x00040007,
1217 	0x3c068, 0xffffffff, 0x00060005,
1218 	0x3c06c, 0xffffffff, 0x00090008,
1219 	0x3c070, 0xffffffff, 0x00010000,
1220 	0x3c074, 0xffffffff, 0x00030002,
1221 	0x3c078, 0xffffffff, 0x00040007,
1222 	0x3c07c, 0xffffffff, 0x00060005,
1223 	0x3c080, 0xffffffff, 0x00090008,
1224 	0x3c084, 0xffffffff, 0x00010000,
1225 	0x3c088, 0xffffffff, 0x00030002,
1226 	0x3c08c, 0xffffffff, 0x00040007,
1227 	0x3c090, 0xffffffff, 0x00060005,
1228 	0x3c094, 0xffffffff, 0x00090008,
1229 	0x3c098, 0xffffffff, 0x00010000,
1230 	0x3c09c, 0xffffffff, 0x00030002,
1231 	0x3c0a0, 0xffffffff, 0x00040007,
1232 	0x3c0a4, 0xffffffff, 0x00060005,
1233 	0x3c0a8, 0xffffffff, 0x00090008,
1234 	0x3c0ac, 0xffffffff, 0x00010000,
1235 	0x3c0b0, 0xffffffff, 0x00030002,
1236 	0x3c0b4, 0xffffffff, 0x00040007,
1237 	0x3c0b8, 0xffffffff, 0x00060005,
1238 	0x3c0bc, 0xffffffff, 0x00090008,
1239 	0x3c000, 0xffffffff, 0x96e00200,
1240 	0x8708, 0xffffffff, 0x00900100,
1241 	0xc424, 0xffffffff, 0x0020003f,
1242 	0x38, 0xffffffff, 0x0140001c,
1243 	0x3c, 0x000f0000, 0x000f0000,
1244 	0x220, 0xffffffff, 0xC060000C,
1245 	0x224, 0xc0000fff, 0x00000100,
1246 	0xf90, 0xffffffff, 0x00000100,
1247 	0xf98, 0x00000101, 0x00000000,
1248 	0x20a8, 0xffffffff, 0x00000104,
1249 	0x55e4, 0xff000fff, 0x00000100,
1250 	0x30cc, 0xc0000fff, 0x00000104,
1251 	0xc1e4, 0x00000001, 0x00000001,
1252 	0xd00c, 0xff000ff0, 0x00000100,
1253 	0xd80c, 0xff000ff0, 0x00000100
1254 };
1255 
1256 static const u32 kalindi_golden_spm_registers[] =
1257 {
1258 	0x30800, 0xe0ffffff, 0xe0000000
1259 };
1260 
1261 static const u32 kalindi_golden_common_registers[] =
1262 {
1263 	0xc770, 0xffffffff, 0x00000800,
1264 	0xc774, 0xffffffff, 0x00000800,
1265 	0xc798, 0xffffffff, 0x00007fbf,
1266 	0xc79c, 0xffffffff, 0x00007faf
1267 };
1268 
1269 static const u32 kalindi_golden_registers[] =
1270 {
1271 	0x3c000, 0xffffdfff, 0x6e944040,
1272 	0x55e4, 0xff607fff, 0xfc000100,
1273 	0x3c220, 0xff000fff, 0x00000100,
1274 	0x3c224, 0xff000fff, 0x00000100,
1275 	0x3c200, 0xfffc0fff, 0x00000100,
1276 	0x6ed8, 0x00010101, 0x00010000,
1277 	0x9830, 0xffffffff, 0x00000000,
1278 	0x9834, 0xf00fffff, 0x00000400,
1279 	0x5bb0, 0x000000f0, 0x00000070,
1280 	0x5bc0, 0xf0311fff, 0x80300000,
1281 	0x98f8, 0x73773777, 0x12010001,
1282 	0x98fc, 0xffffffff, 0x00000010,
1283 	0x9b7c, 0x00ff0000, 0x00fc0000,
1284 	0x8030, 0x00001f0f, 0x0000100a,
1285 	0x2f48, 0x73773777, 0x12010001,
1286 	0x2408, 0x000fffff, 0x000c007f,
1287 	0x8a14, 0xf000003f, 0x00000007,
1288 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1289 	0x30a04, 0x0000ff0f, 0x00000000,
1290 	0x28a4c, 0x07ffffff, 0x06000000,
1291 	0x4d8, 0x00000fff, 0x00000100,
1292 	0x3e78, 0x00000001, 0x00000002,
1293 	0xc768, 0x00000008, 0x00000008,
1294 	0x8c00, 0x000000ff, 0x00000003,
1295 	0x214f8, 0x01ff01ff, 0x00000002,
1296 	0x21498, 0x007ff800, 0x00200000,
1297 	0x2015c, 0xffffffff, 0x00000f40,
1298 	0x88c4, 0x001f3ae3, 0x00000082,
1299 	0x88d4, 0x0000001f, 0x00000010,
1300 	0x30934, 0xffffffff, 0x00000000
1301 };
1302 
1303 static const u32 kalindi_mgcg_cgcg_init[] =
1304 {
1305 	0xc420, 0xffffffff, 0xfffffffc,
1306 	0x30800, 0xffffffff, 0xe0000000,
1307 	0x3c2a0, 0xffffffff, 0x00000100,
1308 	0x3c208, 0xffffffff, 0x00000100,
1309 	0x3c2c0, 0xffffffff, 0x00000100,
1310 	0x3c2c8, 0xffffffff, 0x00000100,
1311 	0x3c2c4, 0xffffffff, 0x00000100,
1312 	0x55e4, 0xffffffff, 0x00600100,
1313 	0x3c280, 0xffffffff, 0x00000100,
1314 	0x3c214, 0xffffffff, 0x06000100,
1315 	0x3c220, 0xffffffff, 0x00000100,
1316 	0x3c218, 0xffffffff, 0x06000100,
1317 	0x3c204, 0xffffffff, 0x00000100,
1318 	0x3c2e0, 0xffffffff, 0x00000100,
1319 	0x3c224, 0xffffffff, 0x00000100,
1320 	0x3c200, 0xffffffff, 0x00000100,
1321 	0x3c230, 0xffffffff, 0x00000100,
1322 	0x3c234, 0xffffffff, 0x00000100,
1323 	0x3c250, 0xffffffff, 0x00000100,
1324 	0x3c254, 0xffffffff, 0x00000100,
1325 	0x3c258, 0xffffffff, 0x00000100,
1326 	0x3c25c, 0xffffffff, 0x00000100,
1327 	0x3c260, 0xffffffff, 0x00000100,
1328 	0x3c27c, 0xffffffff, 0x00000100,
1329 	0x3c278, 0xffffffff, 0x00000100,
1330 	0x3c210, 0xffffffff, 0x06000100,
1331 	0x3c290, 0xffffffff, 0x00000100,
1332 	0x3c274, 0xffffffff, 0x00000100,
1333 	0x3c2b4, 0xffffffff, 0x00000100,
1334 	0x3c2b0, 0xffffffff, 0x00000100,
1335 	0x3c270, 0xffffffff, 0x00000100,
1336 	0x30800, 0xffffffff, 0xe0000000,
1337 	0x3c020, 0xffffffff, 0x00010000,
1338 	0x3c024, 0xffffffff, 0x00030002,
1339 	0x3c028, 0xffffffff, 0x00040007,
1340 	0x3c02c, 0xffffffff, 0x00060005,
1341 	0x3c030, 0xffffffff, 0x00090008,
1342 	0x3c034, 0xffffffff, 0x00010000,
1343 	0x3c038, 0xffffffff, 0x00030002,
1344 	0x3c03c, 0xffffffff, 0x00040007,
1345 	0x3c040, 0xffffffff, 0x00060005,
1346 	0x3c044, 0xffffffff, 0x00090008,
1347 	0x3c000, 0xffffffff, 0x96e00200,
1348 	0x8708, 0xffffffff, 0x00900100,
1349 	0xc424, 0xffffffff, 0x0020003f,
1350 	0x38, 0xffffffff, 0x0140001c,
1351 	0x3c, 0x000f0000, 0x000f0000,
1352 	0x220, 0xffffffff, 0xC060000C,
1353 	0x224, 0xc0000fff, 0x00000100,
1354 	0x20a8, 0xffffffff, 0x00000104,
1355 	0x55e4, 0xff000fff, 0x00000100,
1356 	0x30cc, 0xc0000fff, 0x00000104,
1357 	0xc1e4, 0x00000001, 0x00000001,
1358 	0xd00c, 0xff000ff0, 0x00000100,
1359 	0xd80c, 0xff000ff0, 0x00000100
1360 };
1361 
1362 static const u32 hawaii_golden_spm_registers[] =
1363 {
1364 	0x30800, 0xe0ffffff, 0xe0000000
1365 };
1366 
1367 static const u32 hawaii_golden_common_registers[] =
1368 {
1369 	0x30800, 0xffffffff, 0xe0000000,
1370 	0x28350, 0xffffffff, 0x3a00161a,
1371 	0x28354, 0xffffffff, 0x0000002e,
1372 	0x9a10, 0xffffffff, 0x00018208,
1373 	0x98f8, 0xffffffff, 0x12011003
1374 };
1375 
1376 static const u32 hawaii_golden_registers[] =
1377 {
1378 	0x3354, 0x00000333, 0x00000333,
1379 	0x9a10, 0x00010000, 0x00058208,
1380 	0x9830, 0xffffffff, 0x00000000,
1381 	0x9834, 0xf00fffff, 0x00000400,
1382 	0x9838, 0x0002021c, 0x00020200,
1383 	0xc78, 0x00000080, 0x00000000,
1384 	0x5bb0, 0x000000f0, 0x00000070,
1385 	0x5bc0, 0xf0311fff, 0x80300000,
1386 	0x350c, 0x00810000, 0x408af000,
1387 	0x7030, 0x31000111, 0x00000011,
1388 	0x2f48, 0x73773777, 0x12010001,
1389 	0x2120, 0x0000007f, 0x0000001b,
1390 	0x21dc, 0x00007fb6, 0x00002191,
1391 	0x3628, 0x0000003f, 0x0000000a,
1392 	0x362c, 0x0000003f, 0x0000000a,
1393 	0x2ae4, 0x00073ffe, 0x000022a2,
1394 	0x240c, 0x000007ff, 0x00000000,
1395 	0x8bf0, 0x00002001, 0x00000001,
1396 	0x8b24, 0xffffffff, 0x00ffffff,
1397 	0x30a04, 0x0000ff0f, 0x00000000,
1398 	0x28a4c, 0x07ffffff, 0x06000000,
1399 	0x3e78, 0x00000001, 0x00000002,
1400 	0xc768, 0x00000008, 0x00000008,
1401 	0xc770, 0x00000f00, 0x00000800,
1402 	0xc774, 0x00000f00, 0x00000800,
1403 	0xc798, 0x00ffffff, 0x00ff7fbf,
1404 	0xc79c, 0x00ffffff, 0x00ff7faf,
1405 	0x8c00, 0x000000ff, 0x00000800,
1406 	0xe40, 0x00001fff, 0x00001fff,
1407 	0x9060, 0x0000007f, 0x00000020,
1408 	0x9508, 0x00010000, 0x00010000,
1409 	0xae00, 0x00100000, 0x000ff07c,
1410 	0xac14, 0x000003ff, 0x0000000f,
1411 	0xac10, 0xffffffff, 0x7564fdec,
1412 	0xac0c, 0xffffffff, 0x3120b9a8,
1413 	0xac08, 0x20000000, 0x0f9c0000
1414 };
1415 
1416 static const u32 hawaii_mgcg_cgcg_init[] =
1417 {
1418 	0xc420, 0xffffffff, 0xfffffffd,
1419 	0x30800, 0xffffffff, 0xe0000000,
1420 	0x3c2a0, 0xffffffff, 0x00000100,
1421 	0x3c208, 0xffffffff, 0x00000100,
1422 	0x3c2c0, 0xffffffff, 0x00000100,
1423 	0x3c2c8, 0xffffffff, 0x00000100,
1424 	0x3c2c4, 0xffffffff, 0x00000100,
1425 	0x55e4, 0xffffffff, 0x00200100,
1426 	0x3c280, 0xffffffff, 0x00000100,
1427 	0x3c214, 0xffffffff, 0x06000100,
1428 	0x3c220, 0xffffffff, 0x00000100,
1429 	0x3c218, 0xffffffff, 0x06000100,
1430 	0x3c204, 0xffffffff, 0x00000100,
1431 	0x3c2e0, 0xffffffff, 0x00000100,
1432 	0x3c224, 0xffffffff, 0x00000100,
1433 	0x3c200, 0xffffffff, 0x00000100,
1434 	0x3c230, 0xffffffff, 0x00000100,
1435 	0x3c234, 0xffffffff, 0x00000100,
1436 	0x3c250, 0xffffffff, 0x00000100,
1437 	0x3c254, 0xffffffff, 0x00000100,
1438 	0x3c258, 0xffffffff, 0x00000100,
1439 	0x3c25c, 0xffffffff, 0x00000100,
1440 	0x3c260, 0xffffffff, 0x00000100,
1441 	0x3c27c, 0xffffffff, 0x00000100,
1442 	0x3c278, 0xffffffff, 0x00000100,
1443 	0x3c210, 0xffffffff, 0x06000100,
1444 	0x3c290, 0xffffffff, 0x00000100,
1445 	0x3c274, 0xffffffff, 0x00000100,
1446 	0x3c2b4, 0xffffffff, 0x00000100,
1447 	0x3c2b0, 0xffffffff, 0x00000100,
1448 	0x3c270, 0xffffffff, 0x00000100,
1449 	0x30800, 0xffffffff, 0xe0000000,
1450 	0x3c020, 0xffffffff, 0x00010000,
1451 	0x3c024, 0xffffffff, 0x00030002,
1452 	0x3c028, 0xffffffff, 0x00040007,
1453 	0x3c02c, 0xffffffff, 0x00060005,
1454 	0x3c030, 0xffffffff, 0x00090008,
1455 	0x3c034, 0xffffffff, 0x00010000,
1456 	0x3c038, 0xffffffff, 0x00030002,
1457 	0x3c03c, 0xffffffff, 0x00040007,
1458 	0x3c040, 0xffffffff, 0x00060005,
1459 	0x3c044, 0xffffffff, 0x00090008,
1460 	0x3c048, 0xffffffff, 0x00010000,
1461 	0x3c04c, 0xffffffff, 0x00030002,
1462 	0x3c050, 0xffffffff, 0x00040007,
1463 	0x3c054, 0xffffffff, 0x00060005,
1464 	0x3c058, 0xffffffff, 0x00090008,
1465 	0x3c05c, 0xffffffff, 0x00010000,
1466 	0x3c060, 0xffffffff, 0x00030002,
1467 	0x3c064, 0xffffffff, 0x00040007,
1468 	0x3c068, 0xffffffff, 0x00060005,
1469 	0x3c06c, 0xffffffff, 0x00090008,
1470 	0x3c070, 0xffffffff, 0x00010000,
1471 	0x3c074, 0xffffffff, 0x00030002,
1472 	0x3c078, 0xffffffff, 0x00040007,
1473 	0x3c07c, 0xffffffff, 0x00060005,
1474 	0x3c080, 0xffffffff, 0x00090008,
1475 	0x3c084, 0xffffffff, 0x00010000,
1476 	0x3c088, 0xffffffff, 0x00030002,
1477 	0x3c08c, 0xffffffff, 0x00040007,
1478 	0x3c090, 0xffffffff, 0x00060005,
1479 	0x3c094, 0xffffffff, 0x00090008,
1480 	0x3c098, 0xffffffff, 0x00010000,
1481 	0x3c09c, 0xffffffff, 0x00030002,
1482 	0x3c0a0, 0xffffffff, 0x00040007,
1483 	0x3c0a4, 0xffffffff, 0x00060005,
1484 	0x3c0a8, 0xffffffff, 0x00090008,
1485 	0x3c0ac, 0xffffffff, 0x00010000,
1486 	0x3c0b0, 0xffffffff, 0x00030002,
1487 	0x3c0b4, 0xffffffff, 0x00040007,
1488 	0x3c0b8, 0xffffffff, 0x00060005,
1489 	0x3c0bc, 0xffffffff, 0x00090008,
1490 	0x3c0c0, 0xffffffff, 0x00010000,
1491 	0x3c0c4, 0xffffffff, 0x00030002,
1492 	0x3c0c8, 0xffffffff, 0x00040007,
1493 	0x3c0cc, 0xffffffff, 0x00060005,
1494 	0x3c0d0, 0xffffffff, 0x00090008,
1495 	0x3c0d4, 0xffffffff, 0x00010000,
1496 	0x3c0d8, 0xffffffff, 0x00030002,
1497 	0x3c0dc, 0xffffffff, 0x00040007,
1498 	0x3c0e0, 0xffffffff, 0x00060005,
1499 	0x3c0e4, 0xffffffff, 0x00090008,
1500 	0x3c0e8, 0xffffffff, 0x00010000,
1501 	0x3c0ec, 0xffffffff, 0x00030002,
1502 	0x3c0f0, 0xffffffff, 0x00040007,
1503 	0x3c0f4, 0xffffffff, 0x00060005,
1504 	0x3c0f8, 0xffffffff, 0x00090008,
1505 	0xc318, 0xffffffff, 0x00020200,
1506 	0x3350, 0xffffffff, 0x00000200,
1507 	0x15c0, 0xffffffff, 0x00000400,
1508 	0x55e8, 0xffffffff, 0x00000000,
1509 	0x2f50, 0xffffffff, 0x00000902,
1510 	0x3c000, 0xffffffff, 0x96940200,
1511 	0x8708, 0xffffffff, 0x00900100,
1512 	0xc424, 0xffffffff, 0x0020003f,
1513 	0x38, 0xffffffff, 0x0140001c,
1514 	0x3c, 0x000f0000, 0x000f0000,
1515 	0x220, 0xffffffff, 0xc060000c,
1516 	0x224, 0xc0000fff, 0x00000100,
1517 	0xf90, 0xffffffff, 0x00000100,
1518 	0xf98, 0x00000101, 0x00000000,
1519 	0x20a8, 0xffffffff, 0x00000104,
1520 	0x55e4, 0xff000fff, 0x00000100,
1521 	0x30cc, 0xc0000fff, 0x00000104,
1522 	0xc1e4, 0x00000001, 0x00000001,
1523 	0xd00c, 0xff000ff0, 0x00000100,
1524 	0xd80c, 0xff000ff0, 0x00000100
1525 };
1526 
1527 static const u32 godavari_golden_registers[] =
1528 {
1529 	0x55e4, 0xff607fff, 0xfc000100,
1530 	0x6ed8, 0x00010101, 0x00010000,
1531 	0x9830, 0xffffffff, 0x00000000,
1532 	0x98302, 0xf00fffff, 0x00000400,
1533 	0x6130, 0xffffffff, 0x00010000,
1534 	0x5bb0, 0x000000f0, 0x00000070,
1535 	0x5bc0, 0xf0311fff, 0x80300000,
1536 	0x98f8, 0x73773777, 0x12010001,
1537 	0x98fc, 0xffffffff, 0x00000010,
1538 	0x8030, 0x00001f0f, 0x0000100a,
1539 	0x2f48, 0x73773777, 0x12010001,
1540 	0x2408, 0x000fffff, 0x000c007f,
1541 	0x8a14, 0xf000003f, 0x00000007,
1542 	0x8b24, 0xffffffff, 0x00ff0fff,
1543 	0x30a04, 0x0000ff0f, 0x00000000,
1544 	0x28a4c, 0x07ffffff, 0x06000000,
1545 	0x4d8, 0x00000fff, 0x00000100,
1546 	0xd014, 0x00010000, 0x00810001,
1547 	0xd814, 0x00010000, 0x00810001,
1548 	0x3e78, 0x00000001, 0x00000002,
1549 	0xc768, 0x00000008, 0x00000008,
1550 	0xc770, 0x00000f00, 0x00000800,
1551 	0xc774, 0x00000f00, 0x00000800,
1552 	0xc798, 0x00ffffff, 0x00ff7fbf,
1553 	0xc79c, 0x00ffffff, 0x00ff7faf,
1554 	0x8c00, 0x000000ff, 0x00000001,
1555 	0x214f8, 0x01ff01ff, 0x00000002,
1556 	0x21498, 0x007ff800, 0x00200000,
1557 	0x2015c, 0xffffffff, 0x00000f40,
1558 	0x88c4, 0x001f3ae3, 0x00000082,
1559 	0x88d4, 0x0000001f, 0x00000010,
1560 	0x30934, 0xffffffff, 0x00000000
1561 };
1562 
1563 
cik_init_golden_registers(struct radeon_device * rdev)1564 static void cik_init_golden_registers(struct radeon_device *rdev)
1565 {
1566 	switch (rdev->family) {
1567 	case CHIP_BONAIRE:
1568 		radeon_program_register_sequence(rdev,
1569 						 bonaire_mgcg_cgcg_init,
1570 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1571 		radeon_program_register_sequence(rdev,
1572 						 bonaire_golden_registers,
1573 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1574 		radeon_program_register_sequence(rdev,
1575 						 bonaire_golden_common_registers,
1576 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1577 		radeon_program_register_sequence(rdev,
1578 						 bonaire_golden_spm_registers,
1579 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1580 		break;
1581 	case CHIP_KABINI:
1582 		radeon_program_register_sequence(rdev,
1583 						 kalindi_mgcg_cgcg_init,
1584 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1585 		radeon_program_register_sequence(rdev,
1586 						 kalindi_golden_registers,
1587 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1588 		radeon_program_register_sequence(rdev,
1589 						 kalindi_golden_common_registers,
1590 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1591 		radeon_program_register_sequence(rdev,
1592 						 kalindi_golden_spm_registers,
1593 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1594 		break;
1595 	case CHIP_MULLINS:
1596 		radeon_program_register_sequence(rdev,
1597 						 kalindi_mgcg_cgcg_init,
1598 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1599 		radeon_program_register_sequence(rdev,
1600 						 godavari_golden_registers,
1601 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1602 		radeon_program_register_sequence(rdev,
1603 						 kalindi_golden_common_registers,
1604 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1605 		radeon_program_register_sequence(rdev,
1606 						 kalindi_golden_spm_registers,
1607 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1608 		break;
1609 	case CHIP_KAVERI:
1610 		radeon_program_register_sequence(rdev,
1611 						 spectre_mgcg_cgcg_init,
1612 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1613 		radeon_program_register_sequence(rdev,
1614 						 spectre_golden_registers,
1615 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1616 		radeon_program_register_sequence(rdev,
1617 						 spectre_golden_common_registers,
1618 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1619 		radeon_program_register_sequence(rdev,
1620 						 spectre_golden_spm_registers,
1621 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1622 		break;
1623 	case CHIP_HAWAII:
1624 		radeon_program_register_sequence(rdev,
1625 						 hawaii_mgcg_cgcg_init,
1626 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1627 		radeon_program_register_sequence(rdev,
1628 						 hawaii_golden_registers,
1629 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1630 		radeon_program_register_sequence(rdev,
1631 						 hawaii_golden_common_registers,
1632 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1633 		radeon_program_register_sequence(rdev,
1634 						 hawaii_golden_spm_registers,
1635 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1636 		break;
1637 	default:
1638 		break;
1639 	}
1640 }
1641 
1642 /**
1643  * cik_get_xclk - get the xclk
1644  *
1645  * @rdev: radeon_device pointer
1646  *
1647  * Returns the reference clock used by the gfx engine
1648  * (CIK).
1649  */
cik_get_xclk(struct radeon_device * rdev)1650 u32 cik_get_xclk(struct radeon_device *rdev)
1651 {
1652         u32 reference_clock = rdev->clock.spll.reference_freq;
1653 
1654 	if (rdev->flags & RADEON_IS_IGP) {
1655 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1656 			return reference_clock / 2;
1657 	} else {
1658 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1659 			return reference_clock / 4;
1660 	}
1661 	return reference_clock;
1662 }
1663 
1664 /**
1665  * cik_mm_rdoorbell - read a doorbell dword
1666  *
1667  * @rdev: radeon_device pointer
1668  * @index: doorbell index
1669  *
1670  * Returns the value in the doorbell aperture at the
1671  * requested doorbell index (CIK).
1672  */
cik_mm_rdoorbell(struct radeon_device * rdev,u32 index)1673 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1674 {
1675 	if (index < rdev->doorbell.num_doorbells) {
1676 		return readl(rdev->doorbell.ptr + index);
1677 	} else {
1678 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1679 		return 0;
1680 	}
1681 }
1682 
1683 /**
1684  * cik_mm_wdoorbell - write a doorbell dword
1685  *
1686  * @rdev: radeon_device pointer
1687  * @index: doorbell index
1688  * @v: value to write
1689  *
1690  * Writes @v to the doorbell aperture at the
1691  * requested doorbell index (CIK).
1692  */
cik_mm_wdoorbell(struct radeon_device * rdev,u32 index,u32 v)1693 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1694 {
1695 	if (index < rdev->doorbell.num_doorbells) {
1696 		writel(v, rdev->doorbell.ptr + index);
1697 	} else {
1698 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1699 	}
1700 }
1701 
1702 #define BONAIRE_IO_MC_REGS_SIZE 36
1703 
1704 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1705 {
1706 	{0x00000070, 0x04400000},
1707 	{0x00000071, 0x80c01803},
1708 	{0x00000072, 0x00004004},
1709 	{0x00000073, 0x00000100},
1710 	{0x00000074, 0x00ff0000},
1711 	{0x00000075, 0x34000000},
1712 	{0x00000076, 0x08000014},
1713 	{0x00000077, 0x00cc08ec},
1714 	{0x00000078, 0x00000400},
1715 	{0x00000079, 0x00000000},
1716 	{0x0000007a, 0x04090000},
1717 	{0x0000007c, 0x00000000},
1718 	{0x0000007e, 0x4408a8e8},
1719 	{0x0000007f, 0x00000304},
1720 	{0x00000080, 0x00000000},
1721 	{0x00000082, 0x00000001},
1722 	{0x00000083, 0x00000002},
1723 	{0x00000084, 0xf3e4f400},
1724 	{0x00000085, 0x052024e3},
1725 	{0x00000087, 0x00000000},
1726 	{0x00000088, 0x01000000},
1727 	{0x0000008a, 0x1c0a0000},
1728 	{0x0000008b, 0xff010000},
1729 	{0x0000008d, 0xffffefff},
1730 	{0x0000008e, 0xfff3efff},
1731 	{0x0000008f, 0xfff3efbf},
1732 	{0x00000092, 0xf7ffffff},
1733 	{0x00000093, 0xffffff7f},
1734 	{0x00000095, 0x00101101},
1735 	{0x00000096, 0x00000fff},
1736 	{0x00000097, 0x00116fff},
1737 	{0x00000098, 0x60010000},
1738 	{0x00000099, 0x10010000},
1739 	{0x0000009a, 0x00006000},
1740 	{0x0000009b, 0x00001000},
1741 	{0x0000009f, 0x00b48000}
1742 };
1743 
1744 #define HAWAII_IO_MC_REGS_SIZE 22
1745 
1746 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1747 {
1748 	{0x0000007d, 0x40000000},
1749 	{0x0000007e, 0x40180304},
1750 	{0x0000007f, 0x0000ff00},
1751 	{0x00000081, 0x00000000},
1752 	{0x00000083, 0x00000800},
1753 	{0x00000086, 0x00000000},
1754 	{0x00000087, 0x00000100},
1755 	{0x00000088, 0x00020100},
1756 	{0x00000089, 0x00000000},
1757 	{0x0000008b, 0x00040000},
1758 	{0x0000008c, 0x00000100},
1759 	{0x0000008e, 0xff010000},
1760 	{0x00000090, 0xffffefff},
1761 	{0x00000091, 0xfff3efff},
1762 	{0x00000092, 0xfff3efbf},
1763 	{0x00000093, 0xf7ffffff},
1764 	{0x00000094, 0xffffff7f},
1765 	{0x00000095, 0x00000fff},
1766 	{0x00000096, 0x00116fff},
1767 	{0x00000097, 0x60010000},
1768 	{0x00000098, 0x10010000},
1769 	{0x0000009f, 0x00c79000}
1770 };
1771 
1772 
1773 /**
1774  * cik_srbm_select - select specific register instances
1775  *
1776  * @rdev: radeon_device pointer
1777  * @me: selected ME (micro engine)
1778  * @pipe: pipe
1779  * @queue: queue
1780  * @vmid: VMID
1781  *
1782  * Switches the currently active registers instances.  Some
1783  * registers are instanced per VMID, others are instanced per
1784  * me/pipe/queue combination.
1785  */
cik_srbm_select(struct radeon_device * rdev,u32 me,u32 pipe,u32 queue,u32 vmid)1786 static void cik_srbm_select(struct radeon_device *rdev,
1787 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1788 {
1789 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1790 			     MEID(me & 0x3) |
1791 			     VMID(vmid & 0xf) |
1792 			     QUEUEID(queue & 0x7));
1793 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1794 }
1795 
1796 /* ucode loading */
1797 /**
1798  * ci_mc_load_microcode - load MC ucode into the hw
1799  *
1800  * @rdev: radeon_device pointer
1801  *
1802  * Load the GDDR MC ucode into the hw (CIK).
1803  * Returns 0 on success, error on failure.
1804  */
ci_mc_load_microcode(struct radeon_device * rdev)1805 int ci_mc_load_microcode(struct radeon_device *rdev)
1806 {
1807 	const __be32 *fw_data = NULL;
1808 	const __le32 *new_fw_data = NULL;
1809 	u32 running, blackout = 0;
1810 	u32 *io_mc_regs = NULL;
1811 	const __le32 *new_io_mc_regs = NULL;
1812 	int i, regs_size, ucode_size;
1813 
1814 	if (!rdev->mc_fw)
1815 		return -EINVAL;
1816 
1817 	if (rdev->new_fw) {
1818 		const struct mc_firmware_header_v1_0 *hdr =
1819 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1820 
1821 		radeon_ucode_print_mc_hdr(&hdr->header);
1822 
1823 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1824 		new_io_mc_regs = (const __le32 *)
1825 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1826 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1827 		new_fw_data = (const __le32 *)
1828 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1829 	} else {
1830 		ucode_size = rdev->mc_fw->size / 4;
1831 
1832 		switch (rdev->family) {
1833 		case CHIP_BONAIRE:
1834 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1835 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1836 			break;
1837 		case CHIP_HAWAII:
1838 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1839 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1840 			break;
1841 		default:
1842 			return -EINVAL;
1843 		}
1844 		fw_data = (const __be32 *)rdev->mc_fw->data;
1845 	}
1846 
1847 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1848 
1849 	if (running == 0) {
1850 		if (running) {
1851 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1852 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1853 		}
1854 
1855 		/* reset the engine and set to writable */
1856 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1857 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1858 
1859 		/* load mc io regs */
1860 		for (i = 0; i < regs_size; i++) {
1861 			if (rdev->new_fw) {
1862 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1863 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1864 			} else {
1865 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1866 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1867 			}
1868 		}
1869 		/* load the MC ucode */
1870 		for (i = 0; i < ucode_size; i++) {
1871 			if (rdev->new_fw)
1872 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1873 			else
1874 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1875 		}
1876 
1877 		/* put the engine back into the active state */
1878 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1879 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1880 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1881 
1882 		/* wait for training to complete */
1883 		for (i = 0; i < rdev->usec_timeout; i++) {
1884 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1885 				break;
1886 			udelay(1);
1887 		}
1888 		for (i = 0; i < rdev->usec_timeout; i++) {
1889 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1890 				break;
1891 			udelay(1);
1892 		}
1893 
1894 		if (running)
1895 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1896 	}
1897 
1898 	return 0;
1899 }
1900 
1901 /**
1902  * cik_init_microcode - load ucode images from disk
1903  *
1904  * @rdev: radeon_device pointer
1905  *
1906  * Use the firmware interface to load the ucode images into
1907  * the driver (not loaded into hw).
1908  * Returns 0 on success, error on failure.
1909  */
cik_init_microcode(struct radeon_device * rdev)1910 static int cik_init_microcode(struct radeon_device *rdev)
1911 {
1912 	const char *chip_name;
1913 	const char *new_chip_name;
1914 	size_t pfp_req_size, me_req_size, ce_req_size,
1915 		mec_req_size, rlc_req_size, mc_req_size = 0,
1916 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1917 	char fw_name[30];
1918 	int new_fw = 0;
1919 	int err;
1920 	int num_fw;
1921 
1922 	DRM_DEBUG("\n");
1923 
1924 	switch (rdev->family) {
1925 	case CHIP_BONAIRE:
1926 		chip_name = "BONAIRE";
1927 		new_chip_name = "bonaire";
1928 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1929 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1930 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1931 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1932 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1933 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1934 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1935 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1936 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1937 		num_fw = 8;
1938 		break;
1939 	case CHIP_HAWAII:
1940 		chip_name = "HAWAII";
1941 		new_chip_name = "hawaii";
1942 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1943 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1944 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1945 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1946 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1947 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1948 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1949 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1950 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1951 		num_fw = 8;
1952 		break;
1953 	case CHIP_KAVERI:
1954 		chip_name = "KAVERI";
1955 		new_chip_name = "kaveri";
1956 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1957 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1958 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1959 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1960 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1961 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1962 		num_fw = 7;
1963 		break;
1964 	case CHIP_KABINI:
1965 		chip_name = "KABINI";
1966 		new_chip_name = "kabini";
1967 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1968 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1969 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1970 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1971 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1972 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1973 		num_fw = 6;
1974 		break;
1975 	case CHIP_MULLINS:
1976 		chip_name = "MULLINS";
1977 		new_chip_name = "mullins";
1978 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1979 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1980 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1981 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1982 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1983 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1984 		num_fw = 6;
1985 		break;
1986 	default: BUG();
1987 	}
1988 
1989 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1990 
1991 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1992 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1993 	if (err) {
1994 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1995 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1996 		if (err)
1997 			goto out;
1998 		if (rdev->pfp_fw->size != pfp_req_size) {
1999 			printk(KERN_ERR
2000 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2001 			       rdev->pfp_fw->size, fw_name);
2002 			err = -EINVAL;
2003 			goto out;
2004 		}
2005 	} else {
2006 		err = radeon_ucode_validate(rdev->pfp_fw);
2007 		if (err) {
2008 			printk(KERN_ERR
2009 			       "cik_fw: validation failed for firmware \"%s\"\n",
2010 			       fw_name);
2011 			goto out;
2012 		} else {
2013 			new_fw++;
2014 		}
2015 	}
2016 
2017 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2018 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2019 	if (err) {
2020 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2021 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2022 		if (err)
2023 			goto out;
2024 		if (rdev->me_fw->size != me_req_size) {
2025 			printk(KERN_ERR
2026 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2027 			       rdev->me_fw->size, fw_name);
2028 			err = -EINVAL;
2029 		}
2030 	} else {
2031 		err = radeon_ucode_validate(rdev->me_fw);
2032 		if (err) {
2033 			printk(KERN_ERR
2034 			       "cik_fw: validation failed for firmware \"%s\"\n",
2035 			       fw_name);
2036 			goto out;
2037 		} else {
2038 			new_fw++;
2039 		}
2040 	}
2041 
2042 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2043 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2044 	if (err) {
2045 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2046 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2047 		if (err)
2048 			goto out;
2049 		if (rdev->ce_fw->size != ce_req_size) {
2050 			printk(KERN_ERR
2051 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2052 			       rdev->ce_fw->size, fw_name);
2053 			err = -EINVAL;
2054 		}
2055 	} else {
2056 		err = radeon_ucode_validate(rdev->ce_fw);
2057 		if (err) {
2058 			printk(KERN_ERR
2059 			       "cik_fw: validation failed for firmware \"%s\"\n",
2060 			       fw_name);
2061 			goto out;
2062 		} else {
2063 			new_fw++;
2064 		}
2065 	}
2066 
2067 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2068 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2069 	if (err) {
2070 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2071 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2072 		if (err)
2073 			goto out;
2074 		if (rdev->mec_fw->size != mec_req_size) {
2075 			printk(KERN_ERR
2076 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2077 			       rdev->mec_fw->size, fw_name);
2078 			err = -EINVAL;
2079 		}
2080 	} else {
2081 		err = radeon_ucode_validate(rdev->mec_fw);
2082 		if (err) {
2083 			printk(KERN_ERR
2084 			       "cik_fw: validation failed for firmware \"%s\"\n",
2085 			       fw_name);
2086 			goto out;
2087 		} else {
2088 			new_fw++;
2089 		}
2090 	}
2091 
2092 	if (rdev->family == CHIP_KAVERI) {
2093 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2094 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2095 		if (err) {
2096 			goto out;
2097 		} else {
2098 			err = radeon_ucode_validate(rdev->mec2_fw);
2099 			if (err) {
2100 				goto out;
2101 			} else {
2102 				new_fw++;
2103 			}
2104 		}
2105 	}
2106 
2107 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2108 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2109 	if (err) {
2110 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2111 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2112 		if (err)
2113 			goto out;
2114 		if (rdev->rlc_fw->size != rlc_req_size) {
2115 			printk(KERN_ERR
2116 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2117 			       rdev->rlc_fw->size, fw_name);
2118 			err = -EINVAL;
2119 		}
2120 	} else {
2121 		err = radeon_ucode_validate(rdev->rlc_fw);
2122 		if (err) {
2123 			printk(KERN_ERR
2124 			       "cik_fw: validation failed for firmware \"%s\"\n",
2125 			       fw_name);
2126 			goto out;
2127 		} else {
2128 			new_fw++;
2129 		}
2130 	}
2131 
2132 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2133 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2134 	if (err) {
2135 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2136 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2137 		if (err)
2138 			goto out;
2139 		if (rdev->sdma_fw->size != sdma_req_size) {
2140 			printk(KERN_ERR
2141 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2142 			       rdev->sdma_fw->size, fw_name);
2143 			err = -EINVAL;
2144 		}
2145 	} else {
2146 		err = radeon_ucode_validate(rdev->sdma_fw);
2147 		if (err) {
2148 			printk(KERN_ERR
2149 			       "cik_fw: validation failed for firmware \"%s\"\n",
2150 			       fw_name);
2151 			goto out;
2152 		} else {
2153 			new_fw++;
2154 		}
2155 	}
2156 
2157 	/* No SMC, MC ucode on APUs */
2158 	if (!(rdev->flags & RADEON_IS_IGP)) {
2159 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2160 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2161 		if (err) {
2162 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2163 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2164 			if (err) {
2165 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2166 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2167 				if (err)
2168 					goto out;
2169 			}
2170 			if ((rdev->mc_fw->size != mc_req_size) &&
2171 			    (rdev->mc_fw->size != mc2_req_size)){
2172 				printk(KERN_ERR
2173 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2174 				       rdev->mc_fw->size, fw_name);
2175 				err = -EINVAL;
2176 			}
2177 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2178 		} else {
2179 			err = radeon_ucode_validate(rdev->mc_fw);
2180 			if (err) {
2181 				printk(KERN_ERR
2182 				       "cik_fw: validation failed for firmware \"%s\"\n",
2183 				       fw_name);
2184 				goto out;
2185 			} else {
2186 				new_fw++;
2187 			}
2188 		}
2189 
2190 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2191 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2192 		if (err) {
2193 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2194 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2195 			if (err) {
2196 				printk(KERN_ERR
2197 				       "smc: error loading firmware \"%s\"\n",
2198 				       fw_name);
2199 				release_firmware(rdev->smc_fw);
2200 				rdev->smc_fw = NULL;
2201 				err = 0;
2202 			} else if (rdev->smc_fw->size != smc_req_size) {
2203 				printk(KERN_ERR
2204 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2205 				       rdev->smc_fw->size, fw_name);
2206 				err = -EINVAL;
2207 			}
2208 		} else {
2209 			err = radeon_ucode_validate(rdev->smc_fw);
2210 			if (err) {
2211 				printk(KERN_ERR
2212 				       "cik_fw: validation failed for firmware \"%s\"\n",
2213 				       fw_name);
2214 				goto out;
2215 			} else {
2216 				new_fw++;
2217 			}
2218 		}
2219 	}
2220 
2221 	if (new_fw == 0) {
2222 		rdev->new_fw = false;
2223 	} else if (new_fw < num_fw) {
2224 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2225 		err = -EINVAL;
2226 	} else {
2227 		rdev->new_fw = true;
2228 	}
2229 
2230 out:
2231 	if (err) {
2232 		if (err != -EINVAL)
2233 			printk(KERN_ERR
2234 			       "cik_cp: Failed to load firmware \"%s\"\n",
2235 			       fw_name);
2236 		release_firmware(rdev->pfp_fw);
2237 		rdev->pfp_fw = NULL;
2238 		release_firmware(rdev->me_fw);
2239 		rdev->me_fw = NULL;
2240 		release_firmware(rdev->ce_fw);
2241 		rdev->ce_fw = NULL;
2242 		release_firmware(rdev->mec_fw);
2243 		rdev->mec_fw = NULL;
2244 		release_firmware(rdev->mec2_fw);
2245 		rdev->mec2_fw = NULL;
2246 		release_firmware(rdev->rlc_fw);
2247 		rdev->rlc_fw = NULL;
2248 		release_firmware(rdev->sdma_fw);
2249 		rdev->sdma_fw = NULL;
2250 		release_firmware(rdev->mc_fw);
2251 		rdev->mc_fw = NULL;
2252 		release_firmware(rdev->smc_fw);
2253 		rdev->smc_fw = NULL;
2254 	}
2255 	return err;
2256 }
2257 
2258 /*
2259  * Core functions
2260  */
2261 /**
2262  * cik_tiling_mode_table_init - init the hw tiling table
2263  *
2264  * @rdev: radeon_device pointer
2265  *
2266  * Starting with SI, the tiling setup is done globally in a
2267  * set of 32 tiling modes.  Rather than selecting each set of
2268  * parameters per surface as on older asics, we just select
2269  * which index in the tiling table we want to use, and the
2270  * surface uses those parameters (CIK).
2271  */
cik_tiling_mode_table_init(struct radeon_device * rdev)2272 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2273 {
2274 	const u32 num_tile_mode_states = 32;
2275 	const u32 num_secondary_tile_mode_states = 16;
2276 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2277 	u32 num_pipe_configs;
2278 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2279 		rdev->config.cik.max_shader_engines;
2280 
2281 	switch (rdev->config.cik.mem_row_size_in_kb) {
2282 	case 1:
2283 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2284 		break;
2285 	case 2:
2286 	default:
2287 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2288 		break;
2289 	case 4:
2290 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2291 		break;
2292 	}
2293 
2294 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2295 	if (num_pipe_configs > 8)
2296 		num_pipe_configs = 16;
2297 
2298 	if (num_pipe_configs == 16) {
2299 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2300 			switch (reg_offset) {
2301 			case 0:
2302 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2304 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2306 				break;
2307 			case 1:
2308 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2310 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2312 				break;
2313 			case 2:
2314 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2316 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2318 				break;
2319 			case 3:
2320 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2322 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2324 				break;
2325 			case 4:
2326 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2328 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 						 TILE_SPLIT(split_equal_to_row_size));
2330 				break;
2331 			case 5:
2332 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 				break;
2336 			case 6:
2337 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2338 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2339 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2341 				break;
2342 			case 7:
2343 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2344 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2345 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 						 TILE_SPLIT(split_equal_to_row_size));
2347 				break;
2348 			case 8:
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2350 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2351 				break;
2352 			case 9:
2353 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2356 				break;
2357 			case 10:
2358 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2362 				break;
2363 			case 11:
2364 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2365 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2367 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368 				break;
2369 			case 12:
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 				break;
2375 			case 13:
2376 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2379 				break;
2380 			case 14:
2381 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385 				break;
2386 			case 16:
2387 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2388 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2390 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2391 				break;
2392 			case 17:
2393 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397 				break;
2398 			case 27:
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2402 				break;
2403 			case 28:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2406 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2408 				break;
2409 			case 29:
2410 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2412 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 				break;
2415 			case 30:
2416 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 				break;
2421 			default:
2422 				gb_tile_moden = 0;
2423 				break;
2424 			}
2425 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2426 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2427 		}
2428 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2429 			switch (reg_offset) {
2430 			case 0:
2431 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2433 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2434 						 NUM_BANKS(ADDR_SURF_16_BANK));
2435 				break;
2436 			case 1:
2437 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440 						 NUM_BANKS(ADDR_SURF_16_BANK));
2441 				break;
2442 			case 2:
2443 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2445 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2446 						 NUM_BANKS(ADDR_SURF_16_BANK));
2447 				break;
2448 			case 3:
2449 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452 						 NUM_BANKS(ADDR_SURF_16_BANK));
2453 				break;
2454 			case 4:
2455 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2457 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458 						 NUM_BANKS(ADDR_SURF_8_BANK));
2459 				break;
2460 			case 5:
2461 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 						 NUM_BANKS(ADDR_SURF_4_BANK));
2465 				break;
2466 			case 6:
2467 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 						 NUM_BANKS(ADDR_SURF_2_BANK));
2471 				break;
2472 			case 8:
2473 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 						 NUM_BANKS(ADDR_SURF_16_BANK));
2477 				break;
2478 			case 9:
2479 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2481 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482 						 NUM_BANKS(ADDR_SURF_16_BANK));
2483 				break;
2484 			case 10:
2485 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 						 NUM_BANKS(ADDR_SURF_16_BANK));
2489 				break;
2490 			case 11:
2491 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2494 						 NUM_BANKS(ADDR_SURF_8_BANK));
2495 				break;
2496 			case 12:
2497 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 						 NUM_BANKS(ADDR_SURF_4_BANK));
2501 				break;
2502 			case 13:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2506 						 NUM_BANKS(ADDR_SURF_2_BANK));
2507 				break;
2508 			case 14:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512 						 NUM_BANKS(ADDR_SURF_2_BANK));
2513 				break;
2514 			default:
2515 				gb_tile_moden = 0;
2516 				break;
2517 			}
2518 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2519 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2520 		}
2521 	} else if (num_pipe_configs == 8) {
2522 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2523 			switch (reg_offset) {
2524 			case 0:
2525 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2529 				break;
2530 			case 1:
2531 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2535 				break;
2536 			case 2:
2537 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2541 				break;
2542 			case 3:
2543 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2545 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2547 				break;
2548 			case 4:
2549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 						 TILE_SPLIT(split_equal_to_row_size));
2553 				break;
2554 			case 5:
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2556 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2558 				break;
2559 			case 6:
2560 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2561 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2564 				break;
2565 			case 7:
2566 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2567 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2568 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 						 TILE_SPLIT(split_equal_to_row_size));
2570 				break;
2571 			case 8:
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2573 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2574 				break;
2575 			case 9:
2576 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2579 				break;
2580 			case 10:
2581 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 				break;
2586 			case 11:
2587 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2588 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 				break;
2592 			case 12:
2593 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2594 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597 				break;
2598 			case 13:
2599 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2602 				break;
2603 			case 14:
2604 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608 				break;
2609 			case 16:
2610 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2612 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2613 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614 				break;
2615 			case 17:
2616 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2617 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2618 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 				break;
2621 			case 27:
2622 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2625 				break;
2626 			case 28:
2627 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631 				break;
2632 			case 29:
2633 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2635 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637 				break;
2638 			case 30:
2639 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2640 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2641 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 				break;
2644 			default:
2645 				gb_tile_moden = 0;
2646 				break;
2647 			}
2648 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2649 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2650 		}
2651 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2652 			switch (reg_offset) {
2653 			case 0:
2654 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2656 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2657 						 NUM_BANKS(ADDR_SURF_16_BANK));
2658 				break;
2659 			case 1:
2660 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2662 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2663 						 NUM_BANKS(ADDR_SURF_16_BANK));
2664 				break;
2665 			case 2:
2666 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2669 						 NUM_BANKS(ADDR_SURF_16_BANK));
2670 				break;
2671 			case 3:
2672 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2674 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK));
2676 				break;
2677 			case 4:
2678 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681 						 NUM_BANKS(ADDR_SURF_8_BANK));
2682 				break;
2683 			case 5:
2684 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687 						 NUM_BANKS(ADDR_SURF_4_BANK));
2688 				break;
2689 			case 6:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693 						 NUM_BANKS(ADDR_SURF_2_BANK));
2694 				break;
2695 			case 8:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK));
2700 				break;
2701 			case 9:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK));
2706 				break;
2707 			case 10:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 11:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 12:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2723 						 NUM_BANKS(ADDR_SURF_8_BANK));
2724 				break;
2725 			case 13:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729 						 NUM_BANKS(ADDR_SURF_4_BANK));
2730 				break;
2731 			case 14:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2735 						 NUM_BANKS(ADDR_SURF_2_BANK));
2736 				break;
2737 			default:
2738 				gb_tile_moden = 0;
2739 				break;
2740 			}
2741 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2742 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2743 		}
2744 	} else if (num_pipe_configs == 4) {
2745 		if (num_rbs == 4) {
2746 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2747 				switch (reg_offset) {
2748 				case 0:
2749 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2753 					break;
2754 				case 1:
2755 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2757 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2759 					break;
2760 				case 2:
2761 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2765 					break;
2766 				case 3:
2767 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2768 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2769 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2771 					break;
2772 				case 4:
2773 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 							 TILE_SPLIT(split_equal_to_row_size));
2777 					break;
2778 				case 5:
2779 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2780 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2782 					break;
2783 				case 6:
2784 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2785 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2788 					break;
2789 				case 7:
2790 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 							 TILE_SPLIT(split_equal_to_row_size));
2794 					break;
2795 				case 8:
2796 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2797 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2798 					break;
2799 				case 9:
2800 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2803 					break;
2804 				case 10:
2805 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 					break;
2810 				case 11:
2811 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2812 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 					break;
2816 				case 12:
2817 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2818 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2821 					break;
2822 				case 13:
2823 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2824 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2826 					break;
2827 				case 14:
2828 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2829 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2830 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2832 					break;
2833 				case 16:
2834 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2835 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2836 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838 					break;
2839 				case 17:
2840 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2841 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 					break;
2845 				case 27:
2846 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2849 					break;
2850 				case 28:
2851 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2852 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2853 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 					break;
2856 				case 29:
2857 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2859 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2860 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2861 					break;
2862 				case 30:
2863 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2864 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 					break;
2868 				default:
2869 					gb_tile_moden = 0;
2870 					break;
2871 				}
2872 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2873 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2874 			}
2875 		} else if (num_rbs < 4) {
2876 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2877 				switch (reg_offset) {
2878 				case 0:
2879 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2881 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2882 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2883 					break;
2884 				case 1:
2885 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2888 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889 					break;
2890 				case 2:
2891 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2894 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895 					break;
2896 				case 3:
2897 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2900 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2901 					break;
2902 				case 4:
2903 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2906 							 TILE_SPLIT(split_equal_to_row_size));
2907 					break;
2908 				case 5:
2909 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2910 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2911 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912 					break;
2913 				case 6:
2914 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2915 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2916 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2917 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2918 					break;
2919 				case 7:
2920 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2921 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2922 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2923 							 TILE_SPLIT(split_equal_to_row_size));
2924 					break;
2925 				case 8:
2926 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2927 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2928 					break;
2929 				case 9:
2930 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2933 					break;
2934 				case 10:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 					break;
2940 				case 11:
2941 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2942 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2944 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 					break;
2946 				case 12:
2947 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2950 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 					break;
2952 				case 13:
2953 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2955 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2956 					break;
2957 				case 14:
2958 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2959 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2961 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962 					break;
2963 				case 16:
2964 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2967 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 					break;
2969 				case 17:
2970 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2973 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 					break;
2975 				case 27:
2976 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2979 					break;
2980 				case 28:
2981 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2982 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2983 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2984 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985 					break;
2986 				case 29:
2987 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2989 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2990 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991 					break;
2992 				case 30:
2993 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2994 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2996 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 					break;
2998 				default:
2999 					gb_tile_moden = 0;
3000 					break;
3001 				}
3002 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3003 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3004 			}
3005 		}
3006 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3007 			switch (reg_offset) {
3008 			case 0:
3009 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012 						 NUM_BANKS(ADDR_SURF_16_BANK));
3013 				break;
3014 			case 1:
3015 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 						 NUM_BANKS(ADDR_SURF_16_BANK));
3019 				break;
3020 			case 2:
3021 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3023 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3024 						 NUM_BANKS(ADDR_SURF_16_BANK));
3025 				break;
3026 			case 3:
3027 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030 						 NUM_BANKS(ADDR_SURF_16_BANK));
3031 				break;
3032 			case 4:
3033 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3036 						 NUM_BANKS(ADDR_SURF_16_BANK));
3037 				break;
3038 			case 5:
3039 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 						 NUM_BANKS(ADDR_SURF_8_BANK));
3043 				break;
3044 			case 6:
3045 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3048 						 NUM_BANKS(ADDR_SURF_4_BANK));
3049 				break;
3050 			case 8:
3051 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3052 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3053 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3054 						 NUM_BANKS(ADDR_SURF_16_BANK));
3055 				break;
3056 			case 9:
3057 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3058 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3059 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3060 						 NUM_BANKS(ADDR_SURF_16_BANK));
3061 				break;
3062 			case 10:
3063 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066 						 NUM_BANKS(ADDR_SURF_16_BANK));
3067 				break;
3068 			case 11:
3069 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072 						 NUM_BANKS(ADDR_SURF_16_BANK));
3073 				break;
3074 			case 12:
3075 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3078 						 NUM_BANKS(ADDR_SURF_16_BANK));
3079 				break;
3080 			case 13:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3084 						 NUM_BANKS(ADDR_SURF_8_BANK));
3085 				break;
3086 			case 14:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3090 						 NUM_BANKS(ADDR_SURF_4_BANK));
3091 				break;
3092 			default:
3093 				gb_tile_moden = 0;
3094 				break;
3095 			}
3096 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3097 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3098 		}
3099 	} else if (num_pipe_configs == 2) {
3100 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3101 			switch (reg_offset) {
3102 			case 0:
3103 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3105 						 PIPE_CONFIG(ADDR_SURF_P2) |
3106 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3107 				break;
3108 			case 1:
3109 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3111 						 PIPE_CONFIG(ADDR_SURF_P2) |
3112 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3113 				break;
3114 			case 2:
3115 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3117 						 PIPE_CONFIG(ADDR_SURF_P2) |
3118 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3119 				break;
3120 			case 3:
3121 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3123 						 PIPE_CONFIG(ADDR_SURF_P2) |
3124 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3125 				break;
3126 			case 4:
3127 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3128 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3129 						 PIPE_CONFIG(ADDR_SURF_P2) |
3130 						 TILE_SPLIT(split_equal_to_row_size));
3131 				break;
3132 			case 5:
3133 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134 						 PIPE_CONFIG(ADDR_SURF_P2) |
3135 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3136 				break;
3137 			case 6:
3138 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3139 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3140 						 PIPE_CONFIG(ADDR_SURF_P2) |
3141 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3142 				break;
3143 			case 7:
3144 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3145 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3146 						 PIPE_CONFIG(ADDR_SURF_P2) |
3147 						 TILE_SPLIT(split_equal_to_row_size));
3148 				break;
3149 			case 8:
3150 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3151 						PIPE_CONFIG(ADDR_SURF_P2);
3152 				break;
3153 			case 9:
3154 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3155 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3156 						 PIPE_CONFIG(ADDR_SURF_P2));
3157 				break;
3158 			case 10:
3159 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3161 						 PIPE_CONFIG(ADDR_SURF_P2) |
3162 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163 				break;
3164 			case 11:
3165 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3166 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3167 						 PIPE_CONFIG(ADDR_SURF_P2) |
3168 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3169 				break;
3170 			case 12:
3171 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3172 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3173 						 PIPE_CONFIG(ADDR_SURF_P2) |
3174 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175 				break;
3176 			case 13:
3177 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3178 						 PIPE_CONFIG(ADDR_SURF_P2) |
3179 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3180 				break;
3181 			case 14:
3182 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184 						 PIPE_CONFIG(ADDR_SURF_P2) |
3185 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3186 				break;
3187 			case 16:
3188 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3189 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3190 						 PIPE_CONFIG(ADDR_SURF_P2) |
3191 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192 				break;
3193 			case 17:
3194 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3195 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3196 						 PIPE_CONFIG(ADDR_SURF_P2) |
3197 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198 				break;
3199 			case 27:
3200 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3201 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202 						 PIPE_CONFIG(ADDR_SURF_P2));
3203 				break;
3204 			case 28:
3205 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3206 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3207 						 PIPE_CONFIG(ADDR_SURF_P2) |
3208 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3209 				break;
3210 			case 29:
3211 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3212 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3213 						 PIPE_CONFIG(ADDR_SURF_P2) |
3214 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215 				break;
3216 			case 30:
3217 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3218 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3219 						 PIPE_CONFIG(ADDR_SURF_P2) |
3220 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3221 				break;
3222 			default:
3223 				gb_tile_moden = 0;
3224 				break;
3225 			}
3226 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3227 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3228 		}
3229 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3230 			switch (reg_offset) {
3231 			case 0:
3232 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3234 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235 						 NUM_BANKS(ADDR_SURF_16_BANK));
3236 				break;
3237 			case 1:
3238 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3239 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3240 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 						 NUM_BANKS(ADDR_SURF_16_BANK));
3242 				break;
3243 			case 2:
3244 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3246 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 						 NUM_BANKS(ADDR_SURF_16_BANK));
3248 				break;
3249 			case 3:
3250 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3252 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 						 NUM_BANKS(ADDR_SURF_16_BANK));
3254 				break;
3255 			case 4:
3256 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3258 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 						 NUM_BANKS(ADDR_SURF_16_BANK));
3260 				break;
3261 			case 5:
3262 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 						 NUM_BANKS(ADDR_SURF_16_BANK));
3266 				break;
3267 			case 6:
3268 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3270 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3271 						 NUM_BANKS(ADDR_SURF_8_BANK));
3272 				break;
3273 			case 8:
3274 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3275 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3276 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277 						 NUM_BANKS(ADDR_SURF_16_BANK));
3278 				break;
3279 			case 9:
3280 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3281 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3282 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3283 						 NUM_BANKS(ADDR_SURF_16_BANK));
3284 				break;
3285 			case 10:
3286 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3287 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3288 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3289 						 NUM_BANKS(ADDR_SURF_16_BANK));
3290 				break;
3291 			case 11:
3292 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3293 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3294 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 						 NUM_BANKS(ADDR_SURF_16_BANK));
3296 				break;
3297 			case 12:
3298 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3299 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3300 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301 						 NUM_BANKS(ADDR_SURF_16_BANK));
3302 				break;
3303 			case 13:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 14:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313 						 NUM_BANKS(ADDR_SURF_8_BANK));
3314 				break;
3315 			default:
3316 				gb_tile_moden = 0;
3317 				break;
3318 			}
3319 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3320 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3321 		}
3322 	} else
3323 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3324 }
3325 
3326 /**
3327  * cik_select_se_sh - select which SE, SH to address
3328  *
3329  * @rdev: radeon_device pointer
3330  * @se_num: shader engine to address
3331  * @sh_num: sh block to address
3332  *
3333  * Select which SE, SH combinations to address. Certain
3334  * registers are instanced per SE or SH.  0xffffffff means
3335  * broadcast to all SEs or SHs (CIK).
3336  */
cik_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)3337 static void cik_select_se_sh(struct radeon_device *rdev,
3338 			     u32 se_num, u32 sh_num)
3339 {
3340 	u32 data = INSTANCE_BROADCAST_WRITES;
3341 
3342 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3343 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3344 	else if (se_num == 0xffffffff)
3345 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3346 	else if (sh_num == 0xffffffff)
3347 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3348 	else
3349 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3350 	WREG32(GRBM_GFX_INDEX, data);
3351 }
3352 
3353 /**
3354  * cik_create_bitmask - create a bitmask
3355  *
3356  * @bit_width: length of the mask
3357  *
3358  * create a variable length bit mask (CIK).
3359  * Returns the bitmask.
3360  */
cik_create_bitmask(u32 bit_width)3361 static u32 cik_create_bitmask(u32 bit_width)
3362 {
3363 	u32 i, mask = 0;
3364 
3365 	for (i = 0; i < bit_width; i++) {
3366 		mask <<= 1;
3367 		mask |= 1;
3368 	}
3369 	return mask;
3370 }
3371 
3372 /**
3373  * cik_get_rb_disabled - computes the mask of disabled RBs
3374  *
3375  * @rdev: radeon_device pointer
3376  * @max_rb_num: max RBs (render backends) for the asic
3377  * @se_num: number of SEs (shader engines) for the asic
3378  * @sh_per_se: number of SH blocks per SE for the asic
3379  *
3380  * Calculates the bitmask of disabled RBs (CIK).
3381  * Returns the disabled RB bitmask.
3382  */
cik_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3383 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3384 			      u32 max_rb_num_per_se,
3385 			      u32 sh_per_se)
3386 {
3387 	u32 data, mask;
3388 
3389 	data = RREG32(CC_RB_BACKEND_DISABLE);
3390 	if (data & 1)
3391 		data &= BACKEND_DISABLE_MASK;
3392 	else
3393 		data = 0;
3394 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3395 
3396 	data >>= BACKEND_DISABLE_SHIFT;
3397 
3398 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3399 
3400 	return data & mask;
3401 }
3402 
3403 /**
3404  * cik_setup_rb - setup the RBs on the asic
3405  *
3406  * @rdev: radeon_device pointer
3407  * @se_num: number of SEs (shader engines) for the asic
3408  * @sh_per_se: number of SH blocks per SE for the asic
3409  * @max_rb_num: max RBs (render backends) for the asic
3410  *
3411  * Configures per-SE/SH RB registers (CIK).
3412  */
cik_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3413 static void cik_setup_rb(struct radeon_device *rdev,
3414 			 u32 se_num, u32 sh_per_se,
3415 			 u32 max_rb_num_per_se)
3416 {
3417 	int i, j;
3418 	u32 data, mask;
3419 	u32 disabled_rbs = 0;
3420 	u32 enabled_rbs = 0;
3421 
3422 	for (i = 0; i < se_num; i++) {
3423 		for (j = 0; j < sh_per_se; j++) {
3424 			cik_select_se_sh(rdev, i, j);
3425 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3426 			if (rdev->family == CHIP_HAWAII)
3427 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3428 			else
3429 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3430 		}
3431 	}
3432 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3433 
3434 	mask = 1;
3435 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3436 		if (!(disabled_rbs & mask))
3437 			enabled_rbs |= mask;
3438 		mask <<= 1;
3439 	}
3440 
3441 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3442 
3443 	for (i = 0; i < se_num; i++) {
3444 		cik_select_se_sh(rdev, i, 0xffffffff);
3445 		data = 0;
3446 		for (j = 0; j < sh_per_se; j++) {
3447 			switch (enabled_rbs & 3) {
3448 			case 0:
3449 				if (j == 0)
3450 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3451 				else
3452 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3453 				break;
3454 			case 1:
3455 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3456 				break;
3457 			case 2:
3458 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3459 				break;
3460 			case 3:
3461 			default:
3462 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3463 				break;
3464 			}
3465 			enabled_rbs >>= 2;
3466 		}
3467 		WREG32(PA_SC_RASTER_CONFIG, data);
3468 	}
3469 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3470 }
3471 
3472 /**
3473  * cik_gpu_init - setup the 3D engine
3474  *
3475  * @rdev: radeon_device pointer
3476  *
3477  * Configures the 3D engine and tiling configuration
3478  * registers so that the 3D engine is usable.
3479  */
cik_gpu_init(struct radeon_device * rdev)3480 static void cik_gpu_init(struct radeon_device *rdev)
3481 {
3482 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3483 	u32 mc_shared_chmap, mc_arb_ramcfg;
3484 	u32 hdp_host_path_cntl;
3485 	u32 tmp;
3486 	int i, j;
3487 
3488 	switch (rdev->family) {
3489 	case CHIP_BONAIRE:
3490 		rdev->config.cik.max_shader_engines = 2;
3491 		rdev->config.cik.max_tile_pipes = 4;
3492 		rdev->config.cik.max_cu_per_sh = 7;
3493 		rdev->config.cik.max_sh_per_se = 1;
3494 		rdev->config.cik.max_backends_per_se = 2;
3495 		rdev->config.cik.max_texture_channel_caches = 4;
3496 		rdev->config.cik.max_gprs = 256;
3497 		rdev->config.cik.max_gs_threads = 32;
3498 		rdev->config.cik.max_hw_contexts = 8;
3499 
3500 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3501 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3502 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3503 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3504 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3505 		break;
3506 	case CHIP_HAWAII:
3507 		rdev->config.cik.max_shader_engines = 4;
3508 		rdev->config.cik.max_tile_pipes = 16;
3509 		rdev->config.cik.max_cu_per_sh = 11;
3510 		rdev->config.cik.max_sh_per_se = 1;
3511 		rdev->config.cik.max_backends_per_se = 4;
3512 		rdev->config.cik.max_texture_channel_caches = 16;
3513 		rdev->config.cik.max_gprs = 256;
3514 		rdev->config.cik.max_gs_threads = 32;
3515 		rdev->config.cik.max_hw_contexts = 8;
3516 
3517 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3522 		break;
3523 	case CHIP_KAVERI:
3524 		rdev->config.cik.max_shader_engines = 1;
3525 		rdev->config.cik.max_tile_pipes = 4;
3526 		if ((rdev->pdev->device == 0x1304) ||
3527 		    (rdev->pdev->device == 0x1305) ||
3528 		    (rdev->pdev->device == 0x130C) ||
3529 		    (rdev->pdev->device == 0x130F) ||
3530 		    (rdev->pdev->device == 0x1310) ||
3531 		    (rdev->pdev->device == 0x1311) ||
3532 		    (rdev->pdev->device == 0x131C)) {
3533 			rdev->config.cik.max_cu_per_sh = 8;
3534 			rdev->config.cik.max_backends_per_se = 2;
3535 		} else if ((rdev->pdev->device == 0x1309) ||
3536 			   (rdev->pdev->device == 0x130A) ||
3537 			   (rdev->pdev->device == 0x130D) ||
3538 			   (rdev->pdev->device == 0x1313) ||
3539 			   (rdev->pdev->device == 0x131D)) {
3540 			rdev->config.cik.max_cu_per_sh = 6;
3541 			rdev->config.cik.max_backends_per_se = 2;
3542 		} else if ((rdev->pdev->device == 0x1306) ||
3543 			   (rdev->pdev->device == 0x1307) ||
3544 			   (rdev->pdev->device == 0x130B) ||
3545 			   (rdev->pdev->device == 0x130E) ||
3546 			   (rdev->pdev->device == 0x1315) ||
3547 			   (rdev->pdev->device == 0x1318) ||
3548 			   (rdev->pdev->device == 0x131B)) {
3549 			rdev->config.cik.max_cu_per_sh = 4;
3550 			rdev->config.cik.max_backends_per_se = 1;
3551 		} else {
3552 			rdev->config.cik.max_cu_per_sh = 3;
3553 			rdev->config.cik.max_backends_per_se = 1;
3554 		}
3555 		rdev->config.cik.max_sh_per_se = 1;
3556 		rdev->config.cik.max_texture_channel_caches = 4;
3557 		rdev->config.cik.max_gprs = 256;
3558 		rdev->config.cik.max_gs_threads = 16;
3559 		rdev->config.cik.max_hw_contexts = 8;
3560 
3561 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3562 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3563 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3564 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3565 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3566 		break;
3567 	case CHIP_KABINI:
3568 	case CHIP_MULLINS:
3569 	default:
3570 		rdev->config.cik.max_shader_engines = 1;
3571 		rdev->config.cik.max_tile_pipes = 2;
3572 		rdev->config.cik.max_cu_per_sh = 2;
3573 		rdev->config.cik.max_sh_per_se = 1;
3574 		rdev->config.cik.max_backends_per_se = 1;
3575 		rdev->config.cik.max_texture_channel_caches = 2;
3576 		rdev->config.cik.max_gprs = 256;
3577 		rdev->config.cik.max_gs_threads = 16;
3578 		rdev->config.cik.max_hw_contexts = 8;
3579 
3580 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3581 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3582 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3583 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3584 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3585 		break;
3586 	}
3587 
3588 	/* Initialize HDP */
3589 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3590 		WREG32((0x2c14 + j), 0x00000000);
3591 		WREG32((0x2c18 + j), 0x00000000);
3592 		WREG32((0x2c1c + j), 0x00000000);
3593 		WREG32((0x2c20 + j), 0x00000000);
3594 		WREG32((0x2c24 + j), 0x00000000);
3595 	}
3596 
3597 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3598 
3599 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3600 
3601 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3602 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3603 
3604 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3605 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3606 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3607 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3608 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3609 		rdev->config.cik.mem_row_size_in_kb = 4;
3610 	/* XXX use MC settings? */
3611 	rdev->config.cik.shader_engine_tile_size = 32;
3612 	rdev->config.cik.num_gpus = 1;
3613 	rdev->config.cik.multi_gpu_tile_size = 64;
3614 
3615 	/* fix up row size */
3616 	gb_addr_config &= ~ROW_SIZE_MASK;
3617 	switch (rdev->config.cik.mem_row_size_in_kb) {
3618 	case 1:
3619 	default:
3620 		gb_addr_config |= ROW_SIZE(0);
3621 		break;
3622 	case 2:
3623 		gb_addr_config |= ROW_SIZE(1);
3624 		break;
3625 	case 4:
3626 		gb_addr_config |= ROW_SIZE(2);
3627 		break;
3628 	}
3629 
3630 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3631 	 * not have bank info, so create a custom tiling dword.
3632 	 * bits 3:0   num_pipes
3633 	 * bits 7:4   num_banks
3634 	 * bits 11:8  group_size
3635 	 * bits 15:12 row_size
3636 	 */
3637 	rdev->config.cik.tile_config = 0;
3638 	switch (rdev->config.cik.num_tile_pipes) {
3639 	case 1:
3640 		rdev->config.cik.tile_config |= (0 << 0);
3641 		break;
3642 	case 2:
3643 		rdev->config.cik.tile_config |= (1 << 0);
3644 		break;
3645 	case 4:
3646 		rdev->config.cik.tile_config |= (2 << 0);
3647 		break;
3648 	case 8:
3649 	default:
3650 		/* XXX what about 12? */
3651 		rdev->config.cik.tile_config |= (3 << 0);
3652 		break;
3653 	}
3654 	rdev->config.cik.tile_config |=
3655 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3656 	rdev->config.cik.tile_config |=
3657 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3658 	rdev->config.cik.tile_config |=
3659 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3660 
3661 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3662 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3663 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3664 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3665 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3666 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3667 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3668 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3669 
3670 	cik_tiling_mode_table_init(rdev);
3671 
3672 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3673 		     rdev->config.cik.max_sh_per_se,
3674 		     rdev->config.cik.max_backends_per_se);
3675 
3676 	rdev->config.cik.active_cus = 0;
3677 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3678 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3679 			rdev->config.cik.active_cus +=
3680 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3681 		}
3682 	}
3683 
3684 	/* set HW defaults for 3D engine */
3685 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3686 
3687 	WREG32(SX_DEBUG_1, 0x20);
3688 
3689 	WREG32(TA_CNTL_AUX, 0x00010000);
3690 
3691 	tmp = RREG32(SPI_CONFIG_CNTL);
3692 	tmp |= 0x03000000;
3693 	WREG32(SPI_CONFIG_CNTL, tmp);
3694 
3695 	WREG32(SQ_CONFIG, 1);
3696 
3697 	WREG32(DB_DEBUG, 0);
3698 
3699 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3700 	tmp |= 0x00000400;
3701 	WREG32(DB_DEBUG2, tmp);
3702 
3703 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3704 	tmp |= 0x00020200;
3705 	WREG32(DB_DEBUG3, tmp);
3706 
3707 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3708 	tmp |= 0x00018208;
3709 	WREG32(CB_HW_CONTROL, tmp);
3710 
3711 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3712 
3713 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3714 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3715 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3716 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3717 
3718 	WREG32(VGT_NUM_INSTANCES, 1);
3719 
3720 	WREG32(CP_PERFMON_CNTL, 0);
3721 
3722 	WREG32(SQ_CONFIG, 0);
3723 
3724 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3725 					  FORCE_EOV_MAX_REZ_CNT(255)));
3726 
3727 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3728 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3729 
3730 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3731 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3732 
3733 	tmp = RREG32(HDP_MISC_CNTL);
3734 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3735 	WREG32(HDP_MISC_CNTL, tmp);
3736 
3737 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3738 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3739 
3740 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3741 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3742 
3743 	udelay(50);
3744 }
3745 
3746 /*
3747  * GPU scratch registers helpers function.
3748  */
3749 /**
3750  * cik_scratch_init - setup driver info for CP scratch regs
3751  *
3752  * @rdev: radeon_device pointer
3753  *
3754  * Set up the number and offset of the CP scratch registers.
3755  * NOTE: use of CP scratch registers is a legacy inferface and
3756  * is not used by default on newer asics (r6xx+).  On newer asics,
3757  * memory buffers are used for fences rather than scratch regs.
3758  */
cik_scratch_init(struct radeon_device * rdev)3759 static void cik_scratch_init(struct radeon_device *rdev)
3760 {
3761 	int i;
3762 
3763 	rdev->scratch.num_reg = 7;
3764 	rdev->scratch.reg_base = SCRATCH_REG0;
3765 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3766 		rdev->scratch.free[i] = true;
3767 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3768 	}
3769 }
3770 
3771 /**
3772  * cik_ring_test - basic gfx ring test
3773  *
3774  * @rdev: radeon_device pointer
3775  * @ring: radeon_ring structure holding ring information
3776  *
3777  * Allocate a scratch register and write to it using the gfx ring (CIK).
3778  * Provides a basic gfx ring test to verify that the ring is working.
3779  * Used by cik_cp_gfx_resume();
3780  * Returns 0 on success, error on failure.
3781  */
cik_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)3782 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3783 {
3784 	uint32_t scratch;
3785 	uint32_t tmp = 0;
3786 	unsigned i;
3787 	int r;
3788 
3789 	r = radeon_scratch_get(rdev, &scratch);
3790 	if (r) {
3791 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3792 		return r;
3793 	}
3794 	WREG32(scratch, 0xCAFEDEAD);
3795 	r = radeon_ring_lock(rdev, ring, 3);
3796 	if (r) {
3797 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3798 		radeon_scratch_free(rdev, scratch);
3799 		return r;
3800 	}
3801 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3802 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3803 	radeon_ring_write(ring, 0xDEADBEEF);
3804 	radeon_ring_unlock_commit(rdev, ring, false);
3805 
3806 	for (i = 0; i < rdev->usec_timeout; i++) {
3807 		tmp = RREG32(scratch);
3808 		if (tmp == 0xDEADBEEF)
3809 			break;
3810 		DRM_UDELAY(1);
3811 	}
3812 	if (i < rdev->usec_timeout) {
3813 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3814 	} else {
3815 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3816 			  ring->idx, scratch, tmp);
3817 		r = -EINVAL;
3818 	}
3819 	radeon_scratch_free(rdev, scratch);
3820 	return r;
3821 }
3822 
3823 /**
3824  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3825  *
3826  * @rdev: radeon_device pointer
3827  * @ridx: radeon ring index
3828  *
3829  * Emits an hdp flush on the cp.
3830  */
cik_hdp_flush_cp_ring_emit(struct radeon_device * rdev,int ridx)3831 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3832 				       int ridx)
3833 {
3834 	struct radeon_ring *ring = &rdev->ring[ridx];
3835 	u32 ref_and_mask;
3836 
3837 	switch (ring->idx) {
3838 	case CAYMAN_RING_TYPE_CP1_INDEX:
3839 	case CAYMAN_RING_TYPE_CP2_INDEX:
3840 	default:
3841 		switch (ring->me) {
3842 		case 0:
3843 			ref_and_mask = CP2 << ring->pipe;
3844 			break;
3845 		case 1:
3846 			ref_and_mask = CP6 << ring->pipe;
3847 			break;
3848 		default:
3849 			return;
3850 		}
3851 		break;
3852 	case RADEON_RING_TYPE_GFX_INDEX:
3853 		ref_and_mask = CP0;
3854 		break;
3855 	}
3856 
3857 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3858 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3859 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3860 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3861 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3862 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3863 	radeon_ring_write(ring, ref_and_mask);
3864 	radeon_ring_write(ring, ref_and_mask);
3865 	radeon_ring_write(ring, 0x20); /* poll interval */
3866 }
3867 
3868 /**
3869  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3870  *
3871  * @rdev: radeon_device pointer
3872  * @fence: radeon fence object
3873  *
3874  * Emits a fence sequnce number on the gfx ring and flushes
3875  * GPU caches.
3876  */
cik_fence_gfx_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3877 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3878 			     struct radeon_fence *fence)
3879 {
3880 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3881 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3882 
3883 	/* Workaround for cache flush problems. First send a dummy EOP
3884 	 * event down the pipe with seq one below.
3885 	 */
3886 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3887 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3888 				 EOP_TC_ACTION_EN |
3889 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3890 				 EVENT_INDEX(5)));
3891 	radeon_ring_write(ring, addr & 0xfffffffc);
3892 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3893 				DATA_SEL(1) | INT_SEL(0));
3894 	radeon_ring_write(ring, fence->seq - 1);
3895 	radeon_ring_write(ring, 0);
3896 
3897 	/* Then send the real EOP event down the pipe. */
3898 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3899 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3900 				 EOP_TC_ACTION_EN |
3901 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3902 				 EVENT_INDEX(5)));
3903 	radeon_ring_write(ring, addr & 0xfffffffc);
3904 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3905 	radeon_ring_write(ring, fence->seq);
3906 	radeon_ring_write(ring, 0);
3907 }
3908 
3909 /**
3910  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3911  *
3912  * @rdev: radeon_device pointer
3913  * @fence: radeon fence object
3914  *
3915  * Emits a fence sequnce number on the compute ring and flushes
3916  * GPU caches.
3917  */
cik_fence_compute_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3918 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3919 				 struct radeon_fence *fence)
3920 {
3921 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3922 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3923 
3924 	/* RELEASE_MEM - flush caches, send int */
3925 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3926 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3927 				 EOP_TC_ACTION_EN |
3928 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3929 				 EVENT_INDEX(5)));
3930 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3931 	radeon_ring_write(ring, addr & 0xfffffffc);
3932 	radeon_ring_write(ring, upper_32_bits(addr));
3933 	radeon_ring_write(ring, fence->seq);
3934 	radeon_ring_write(ring, 0);
3935 }
3936 
3937 /**
3938  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3939  *
3940  * @rdev: radeon_device pointer
3941  * @ring: radeon ring buffer object
3942  * @semaphore: radeon semaphore object
3943  * @emit_wait: Is this a sempahore wait?
3944  *
3945  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3946  * from running ahead of semaphore waits.
3947  */
cik_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)3948 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3949 			     struct radeon_ring *ring,
3950 			     struct radeon_semaphore *semaphore,
3951 			     bool emit_wait)
3952 {
3953 	uint64_t addr = semaphore->gpu_addr;
3954 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3955 
3956 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3957 	radeon_ring_write(ring, lower_32_bits(addr));
3958 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3959 
3960 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3961 		/* Prevent the PFP from running ahead of the semaphore wait */
3962 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3963 		radeon_ring_write(ring, 0x0);
3964 	}
3965 
3966 	return true;
3967 }
3968 
3969 /**
3970  * cik_copy_cpdma - copy pages using the CP DMA engine
3971  *
3972  * @rdev: radeon_device pointer
3973  * @src_offset: src GPU address
3974  * @dst_offset: dst GPU address
3975  * @num_gpu_pages: number of GPU pages to xfer
3976  * @resv: reservation object to sync to
3977  *
3978  * Copy GPU paging using the CP DMA engine (CIK+).
3979  * Used by the radeon ttm implementation to move pages if
3980  * registered as the asic copy callback.
3981  */
cik_copy_cpdma(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct reservation_object * resv)3982 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3983 				    uint64_t src_offset, uint64_t dst_offset,
3984 				    unsigned num_gpu_pages,
3985 				    struct reservation_object *resv)
3986 {
3987 	struct radeon_semaphore *sem = NULL;
3988 	struct radeon_fence *fence;
3989 	int ring_index = rdev->asic->copy.blit_ring_index;
3990 	struct radeon_ring *ring = &rdev->ring[ring_index];
3991 	u32 size_in_bytes, cur_size_in_bytes, control;
3992 	int i, num_loops;
3993 	int r = 0;
3994 
3995 	r = radeon_semaphore_create(rdev, &sem);
3996 	if (r) {
3997 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3998 		return ERR_PTR(r);
3999 	}
4000 
4001 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4002 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4003 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4004 	if (r) {
4005 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4006 		radeon_semaphore_free(rdev, &sem, NULL);
4007 		return ERR_PTR(r);
4008 	}
4009 
4010 	radeon_semaphore_sync_resv(rdev, sem, resv, false);
4011 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
4012 
4013 	for (i = 0; i < num_loops; i++) {
4014 		cur_size_in_bytes = size_in_bytes;
4015 		if (cur_size_in_bytes > 0x1fffff)
4016 			cur_size_in_bytes = 0x1fffff;
4017 		size_in_bytes -= cur_size_in_bytes;
4018 		control = 0;
4019 		if (size_in_bytes == 0)
4020 			control |= PACKET3_DMA_DATA_CP_SYNC;
4021 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4022 		radeon_ring_write(ring, control);
4023 		radeon_ring_write(ring, lower_32_bits(src_offset));
4024 		radeon_ring_write(ring, upper_32_bits(src_offset));
4025 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4026 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4027 		radeon_ring_write(ring, cur_size_in_bytes);
4028 		src_offset += cur_size_in_bytes;
4029 		dst_offset += cur_size_in_bytes;
4030 	}
4031 
4032 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4033 	if (r) {
4034 		radeon_ring_unlock_undo(rdev, ring);
4035 		radeon_semaphore_free(rdev, &sem, NULL);
4036 		return ERR_PTR(r);
4037 	}
4038 
4039 	radeon_ring_unlock_commit(rdev, ring, false);
4040 	radeon_semaphore_free(rdev, &sem, fence);
4041 
4042 	return fence;
4043 }
4044 
4045 /*
4046  * IB stuff
4047  */
4048 /**
4049  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4050  *
4051  * @rdev: radeon_device pointer
4052  * @ib: radeon indirect buffer object
4053  *
4054  * Emits an DE (drawing engine) or CE (constant engine) IB
4055  * on the gfx ring.  IBs are usually generated by userspace
4056  * acceleration drivers and submitted to the kernel for
4057  * sheduling on the ring.  This function schedules the IB
4058  * on the gfx ring for execution by the GPU.
4059  */
cik_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)4060 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4061 {
4062 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4063 	u32 header, control = INDIRECT_BUFFER_VALID;
4064 
4065 	if (ib->is_const_ib) {
4066 		/* set switch buffer packet before const IB */
4067 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4068 		radeon_ring_write(ring, 0);
4069 
4070 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4071 	} else {
4072 		u32 next_rptr;
4073 		if (ring->rptr_save_reg) {
4074 			next_rptr = ring->wptr + 3 + 4;
4075 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4076 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4077 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4078 			radeon_ring_write(ring, next_rptr);
4079 		} else if (rdev->wb.enabled) {
4080 			next_rptr = ring->wptr + 5 + 4;
4081 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4082 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4083 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4084 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4085 			radeon_ring_write(ring, next_rptr);
4086 		}
4087 
4088 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4089 	}
4090 
4091 	control |= ib->length_dw |
4092 		(ib->vm ? (ib->vm->id << 24) : 0);
4093 
4094 	radeon_ring_write(ring, header);
4095 	radeon_ring_write(ring,
4096 #ifdef __BIG_ENDIAN
4097 			  (2 << 0) |
4098 #endif
4099 			  (ib->gpu_addr & 0xFFFFFFFC));
4100 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4101 	radeon_ring_write(ring, control);
4102 }
4103 
4104 /**
4105  * cik_ib_test - basic gfx ring IB test
4106  *
4107  * @rdev: radeon_device pointer
4108  * @ring: radeon_ring structure holding ring information
4109  *
4110  * Allocate an IB and execute it on the gfx ring (CIK).
4111  * Provides a basic gfx ring test to verify that IBs are working.
4112  * Returns 0 on success, error on failure.
4113  */
cik_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)4114 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4115 {
4116 	struct radeon_ib ib;
4117 	uint32_t scratch;
4118 	uint32_t tmp = 0;
4119 	unsigned i;
4120 	int r;
4121 
4122 	r = radeon_scratch_get(rdev, &scratch);
4123 	if (r) {
4124 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4125 		return r;
4126 	}
4127 	WREG32(scratch, 0xCAFEDEAD);
4128 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4129 	if (r) {
4130 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4131 		radeon_scratch_free(rdev, scratch);
4132 		return r;
4133 	}
4134 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4135 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4136 	ib.ptr[2] = 0xDEADBEEF;
4137 	ib.length_dw = 3;
4138 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4139 	if (r) {
4140 		radeon_scratch_free(rdev, scratch);
4141 		radeon_ib_free(rdev, &ib);
4142 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4143 		return r;
4144 	}
4145 	r = radeon_fence_wait(ib.fence, false);
4146 	if (r) {
4147 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4148 		radeon_scratch_free(rdev, scratch);
4149 		radeon_ib_free(rdev, &ib);
4150 		return r;
4151 	}
4152 	for (i = 0; i < rdev->usec_timeout; i++) {
4153 		tmp = RREG32(scratch);
4154 		if (tmp == 0xDEADBEEF)
4155 			break;
4156 		DRM_UDELAY(1);
4157 	}
4158 	if (i < rdev->usec_timeout) {
4159 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4160 	} else {
4161 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4162 			  scratch, tmp);
4163 		r = -EINVAL;
4164 	}
4165 	radeon_scratch_free(rdev, scratch);
4166 	radeon_ib_free(rdev, &ib);
4167 	return r;
4168 }
4169 
4170 /*
4171  * CP.
4172  * On CIK, gfx and compute now have independant command processors.
4173  *
4174  * GFX
4175  * Gfx consists of a single ring and can process both gfx jobs and
4176  * compute jobs.  The gfx CP consists of three microengines (ME):
4177  * PFP - Pre-Fetch Parser
4178  * ME - Micro Engine
4179  * CE - Constant Engine
4180  * The PFP and ME make up what is considered the Drawing Engine (DE).
4181  * The CE is an asynchronous engine used for updating buffer desciptors
4182  * used by the DE so that they can be loaded into cache in parallel
4183  * while the DE is processing state update packets.
4184  *
4185  * Compute
4186  * The compute CP consists of two microengines (ME):
4187  * MEC1 - Compute MicroEngine 1
4188  * MEC2 - Compute MicroEngine 2
4189  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4190  * The queues are exposed to userspace and are programmed directly
4191  * by the compute runtime.
4192  */
4193 /**
4194  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4195  *
4196  * @rdev: radeon_device pointer
4197  * @enable: enable or disable the MEs
4198  *
4199  * Halts or unhalts the gfx MEs.
4200  */
cik_cp_gfx_enable(struct radeon_device * rdev,bool enable)4201 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4202 {
4203 	if (enable)
4204 		WREG32(CP_ME_CNTL, 0);
4205 	else {
4206 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4207 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4208 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4209 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4210 	}
4211 	udelay(50);
4212 }
4213 
4214 /**
4215  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4216  *
4217  * @rdev: radeon_device pointer
4218  *
4219  * Loads the gfx PFP, ME, and CE ucode.
4220  * Returns 0 for success, -EINVAL if the ucode is not available.
4221  */
cik_cp_gfx_load_microcode(struct radeon_device * rdev)4222 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4223 {
4224 	int i;
4225 
4226 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4227 		return -EINVAL;
4228 
4229 	cik_cp_gfx_enable(rdev, false);
4230 
4231 	if (rdev->new_fw) {
4232 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4233 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4234 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4235 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4236 		const struct gfx_firmware_header_v1_0 *me_hdr =
4237 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4238 		const __le32 *fw_data;
4239 		u32 fw_size;
4240 
4241 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4242 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4243 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4244 
4245 		/* PFP */
4246 		fw_data = (const __le32 *)
4247 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4248 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4249 		WREG32(CP_PFP_UCODE_ADDR, 0);
4250 		for (i = 0; i < fw_size; i++)
4251 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4252 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4253 
4254 		/* CE */
4255 		fw_data = (const __le32 *)
4256 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4257 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4258 		WREG32(CP_CE_UCODE_ADDR, 0);
4259 		for (i = 0; i < fw_size; i++)
4260 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4261 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4262 
4263 		/* ME */
4264 		fw_data = (const __be32 *)
4265 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4266 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4267 		WREG32(CP_ME_RAM_WADDR, 0);
4268 		for (i = 0; i < fw_size; i++)
4269 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4270 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4271 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4272 	} else {
4273 		const __be32 *fw_data;
4274 
4275 		/* PFP */
4276 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4277 		WREG32(CP_PFP_UCODE_ADDR, 0);
4278 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4279 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4280 		WREG32(CP_PFP_UCODE_ADDR, 0);
4281 
4282 		/* CE */
4283 		fw_data = (const __be32 *)rdev->ce_fw->data;
4284 		WREG32(CP_CE_UCODE_ADDR, 0);
4285 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4286 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4287 		WREG32(CP_CE_UCODE_ADDR, 0);
4288 
4289 		/* ME */
4290 		fw_data = (const __be32 *)rdev->me_fw->data;
4291 		WREG32(CP_ME_RAM_WADDR, 0);
4292 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4293 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4294 		WREG32(CP_ME_RAM_WADDR, 0);
4295 	}
4296 
4297 	return 0;
4298 }
4299 
4300 /**
4301  * cik_cp_gfx_start - start the gfx ring
4302  *
4303  * @rdev: radeon_device pointer
4304  *
4305  * Enables the ring and loads the clear state context and other
4306  * packets required to init the ring.
4307  * Returns 0 for success, error for failure.
4308  */
cik_cp_gfx_start(struct radeon_device * rdev)4309 static int cik_cp_gfx_start(struct radeon_device *rdev)
4310 {
4311 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4312 	int r, i;
4313 
4314 	/* init the CP */
4315 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4316 	WREG32(CP_ENDIAN_SWAP, 0);
4317 	WREG32(CP_DEVICE_ID, 1);
4318 
4319 	cik_cp_gfx_enable(rdev, true);
4320 
4321 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4322 	if (r) {
4323 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4324 		return r;
4325 	}
4326 
4327 	/* init the CE partitions.  CE only used for gfx on CIK */
4328 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4329 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4330 	radeon_ring_write(ring, 0x8000);
4331 	radeon_ring_write(ring, 0x8000);
4332 
4333 	/* setup clear context state */
4334 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4335 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4336 
4337 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4338 	radeon_ring_write(ring, 0x80000000);
4339 	radeon_ring_write(ring, 0x80000000);
4340 
4341 	for (i = 0; i < cik_default_size; i++)
4342 		radeon_ring_write(ring, cik_default_state[i]);
4343 
4344 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4345 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4346 
4347 	/* set clear context state */
4348 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4349 	radeon_ring_write(ring, 0);
4350 
4351 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4352 	radeon_ring_write(ring, 0x00000316);
4353 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4354 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4355 
4356 	radeon_ring_unlock_commit(rdev, ring, false);
4357 
4358 	return 0;
4359 }
4360 
4361 /**
4362  * cik_cp_gfx_fini - stop the gfx ring
4363  *
4364  * @rdev: radeon_device pointer
4365  *
4366  * Stop the gfx ring and tear down the driver ring
4367  * info.
4368  */
cik_cp_gfx_fini(struct radeon_device * rdev)4369 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4370 {
4371 	cik_cp_gfx_enable(rdev, false);
4372 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4373 }
4374 
4375 /**
4376  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4377  *
4378  * @rdev: radeon_device pointer
4379  *
4380  * Program the location and size of the gfx ring buffer
4381  * and test it to make sure it's working.
4382  * Returns 0 for success, error for failure.
4383  */
cik_cp_gfx_resume(struct radeon_device * rdev)4384 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4385 {
4386 	struct radeon_ring *ring;
4387 	u32 tmp;
4388 	u32 rb_bufsz;
4389 	u64 rb_addr;
4390 	int r;
4391 
4392 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4393 	if (rdev->family != CHIP_HAWAII)
4394 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4395 
4396 	/* Set the write pointer delay */
4397 	WREG32(CP_RB_WPTR_DELAY, 0);
4398 
4399 	/* set the RB to use vmid 0 */
4400 	WREG32(CP_RB_VMID, 0);
4401 
4402 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4403 
4404 	/* ring 0 - compute and gfx */
4405 	/* Set ring buffer size */
4406 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4407 	rb_bufsz = order_base_2(ring->ring_size / 8);
4408 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4409 #ifdef __BIG_ENDIAN
4410 	tmp |= BUF_SWAP_32BIT;
4411 #endif
4412 	WREG32(CP_RB0_CNTL, tmp);
4413 
4414 	/* Initialize the ring buffer's read and write pointers */
4415 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4416 	ring->wptr = 0;
4417 	WREG32(CP_RB0_WPTR, ring->wptr);
4418 
4419 	/* set the wb address wether it's enabled or not */
4420 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4421 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4422 
4423 	/* scratch register shadowing is no longer supported */
4424 	WREG32(SCRATCH_UMSK, 0);
4425 
4426 	if (!rdev->wb.enabled)
4427 		tmp |= RB_NO_UPDATE;
4428 
4429 	mdelay(1);
4430 	WREG32(CP_RB0_CNTL, tmp);
4431 
4432 	rb_addr = ring->gpu_addr >> 8;
4433 	WREG32(CP_RB0_BASE, rb_addr);
4434 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4435 
4436 	/* start the ring */
4437 	cik_cp_gfx_start(rdev);
4438 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4439 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4440 	if (r) {
4441 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4442 		return r;
4443 	}
4444 
4445 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4446 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4447 
4448 	return 0;
4449 }
4450 
cik_gfx_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4451 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4452 		     struct radeon_ring *ring)
4453 {
4454 	u32 rptr;
4455 
4456 	if (rdev->wb.enabled)
4457 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4458 	else
4459 		rptr = RREG32(CP_RB0_RPTR);
4460 
4461 	return rptr;
4462 }
4463 
cik_gfx_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4464 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4465 		     struct radeon_ring *ring)
4466 {
4467 	u32 wptr;
4468 
4469 	wptr = RREG32(CP_RB0_WPTR);
4470 
4471 	return wptr;
4472 }
4473 
cik_gfx_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4474 void cik_gfx_set_wptr(struct radeon_device *rdev,
4475 		      struct radeon_ring *ring)
4476 {
4477 	WREG32(CP_RB0_WPTR, ring->wptr);
4478 	(void)RREG32(CP_RB0_WPTR);
4479 }
4480 
cik_compute_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4481 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4482 			 struct radeon_ring *ring)
4483 {
4484 	u32 rptr;
4485 
4486 	if (rdev->wb.enabled) {
4487 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4488 	} else {
4489 		mutex_lock(&rdev->srbm_mutex);
4490 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4491 		rptr = RREG32(CP_HQD_PQ_RPTR);
4492 		cik_srbm_select(rdev, 0, 0, 0, 0);
4493 		mutex_unlock(&rdev->srbm_mutex);
4494 	}
4495 
4496 	return rptr;
4497 }
4498 
cik_compute_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4499 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4500 			 struct radeon_ring *ring)
4501 {
4502 	u32 wptr;
4503 
4504 	if (rdev->wb.enabled) {
4505 		/* XXX check if swapping is necessary on BE */
4506 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4507 	} else {
4508 		mutex_lock(&rdev->srbm_mutex);
4509 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4510 		wptr = RREG32(CP_HQD_PQ_WPTR);
4511 		cik_srbm_select(rdev, 0, 0, 0, 0);
4512 		mutex_unlock(&rdev->srbm_mutex);
4513 	}
4514 
4515 	return wptr;
4516 }
4517 
cik_compute_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4518 void cik_compute_set_wptr(struct radeon_device *rdev,
4519 			  struct radeon_ring *ring)
4520 {
4521 	/* XXX check if swapping is necessary on BE */
4522 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4523 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4524 }
4525 
cik_compute_stop(struct radeon_device * rdev,struct radeon_ring * ring)4526 static void cik_compute_stop(struct radeon_device *rdev,
4527 			     struct radeon_ring *ring)
4528 {
4529 	u32 j, tmp;
4530 
4531 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4532 	/* Disable wptr polling. */
4533 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4534 	tmp &= ~WPTR_POLL_EN;
4535 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4536 	/* Disable HQD. */
4537 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4538 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4539 		for (j = 0; j < rdev->usec_timeout; j++) {
4540 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4541 				break;
4542 			udelay(1);
4543 		}
4544 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4545 		WREG32(CP_HQD_PQ_RPTR, 0);
4546 		WREG32(CP_HQD_PQ_WPTR, 0);
4547 	}
4548 	cik_srbm_select(rdev, 0, 0, 0, 0);
4549 }
4550 
4551 /**
4552  * cik_cp_compute_enable - enable/disable the compute CP MEs
4553  *
4554  * @rdev: radeon_device pointer
4555  * @enable: enable or disable the MEs
4556  *
4557  * Halts or unhalts the compute MEs.
4558  */
cik_cp_compute_enable(struct radeon_device * rdev,bool enable)4559 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4560 {
4561 	if (enable)
4562 		WREG32(CP_MEC_CNTL, 0);
4563 	else {
4564 		/*
4565 		 * To make hibernation reliable we need to clear compute ring
4566 		 * configuration before halting the compute ring.
4567 		 */
4568 		mutex_lock(&rdev->srbm_mutex);
4569 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4570 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4571 		mutex_unlock(&rdev->srbm_mutex);
4572 
4573 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4574 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4575 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4576 	}
4577 	udelay(50);
4578 }
4579 
4580 /**
4581  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4582  *
4583  * @rdev: radeon_device pointer
4584  *
4585  * Loads the compute MEC1&2 ucode.
4586  * Returns 0 for success, -EINVAL if the ucode is not available.
4587  */
cik_cp_compute_load_microcode(struct radeon_device * rdev)4588 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4589 {
4590 	int i;
4591 
4592 	if (!rdev->mec_fw)
4593 		return -EINVAL;
4594 
4595 	cik_cp_compute_enable(rdev, false);
4596 
4597 	if (rdev->new_fw) {
4598 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4599 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4600 		const __le32 *fw_data;
4601 		u32 fw_size;
4602 
4603 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4604 
4605 		/* MEC1 */
4606 		fw_data = (const __le32 *)
4607 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4608 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4609 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4610 		for (i = 0; i < fw_size; i++)
4611 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4612 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4613 
4614 		/* MEC2 */
4615 		if (rdev->family == CHIP_KAVERI) {
4616 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4617 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4618 
4619 			fw_data = (const __le32 *)
4620 				(rdev->mec2_fw->data +
4621 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4622 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4623 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4624 			for (i = 0; i < fw_size; i++)
4625 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4626 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4627 		}
4628 	} else {
4629 		const __be32 *fw_data;
4630 
4631 		/* MEC1 */
4632 		fw_data = (const __be32 *)rdev->mec_fw->data;
4633 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4634 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4635 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4636 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4637 
4638 		if (rdev->family == CHIP_KAVERI) {
4639 			/* MEC2 */
4640 			fw_data = (const __be32 *)rdev->mec_fw->data;
4641 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4642 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4643 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4644 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4645 		}
4646 	}
4647 
4648 	return 0;
4649 }
4650 
4651 /**
4652  * cik_cp_compute_start - start the compute queues
4653  *
4654  * @rdev: radeon_device pointer
4655  *
4656  * Enable the compute queues.
4657  * Returns 0 for success, error for failure.
4658  */
cik_cp_compute_start(struct radeon_device * rdev)4659 static int cik_cp_compute_start(struct radeon_device *rdev)
4660 {
4661 	cik_cp_compute_enable(rdev, true);
4662 
4663 	return 0;
4664 }
4665 
4666 /**
4667  * cik_cp_compute_fini - stop the compute queues
4668  *
4669  * @rdev: radeon_device pointer
4670  *
4671  * Stop the compute queues and tear down the driver queue
4672  * info.
4673  */
cik_cp_compute_fini(struct radeon_device * rdev)4674 static void cik_cp_compute_fini(struct radeon_device *rdev)
4675 {
4676 	int i, idx, r;
4677 
4678 	cik_cp_compute_enable(rdev, false);
4679 
4680 	for (i = 0; i < 2; i++) {
4681 		if (i == 0)
4682 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4683 		else
4684 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4685 
4686 		if (rdev->ring[idx].mqd_obj) {
4687 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4688 			if (unlikely(r != 0))
4689 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4690 
4691 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4692 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4693 
4694 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4695 			rdev->ring[idx].mqd_obj = NULL;
4696 		}
4697 	}
4698 }
4699 
cik_mec_fini(struct radeon_device * rdev)4700 static void cik_mec_fini(struct radeon_device *rdev)
4701 {
4702 	int r;
4703 
4704 	if (rdev->mec.hpd_eop_obj) {
4705 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4706 		if (unlikely(r != 0))
4707 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4708 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4709 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4710 
4711 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4712 		rdev->mec.hpd_eop_obj = NULL;
4713 	}
4714 }
4715 
4716 #define MEC_HPD_SIZE 2048
4717 
cik_mec_init(struct radeon_device * rdev)4718 static int cik_mec_init(struct radeon_device *rdev)
4719 {
4720 	int r;
4721 	u32 *hpd;
4722 
4723 	/*
4724 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4725 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4726 	 */
4727 	if (rdev->family == CHIP_KAVERI)
4728 		rdev->mec.num_mec = 2;
4729 	else
4730 		rdev->mec.num_mec = 1;
4731 	rdev->mec.num_pipe = 4;
4732 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4733 
4734 	if (rdev->mec.hpd_eop_obj == NULL) {
4735 		r = radeon_bo_create(rdev,
4736 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4737 				     PAGE_SIZE, true,
4738 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4739 				     &rdev->mec.hpd_eop_obj);
4740 		if (r) {
4741 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4742 			return r;
4743 		}
4744 	}
4745 
4746 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4747 	if (unlikely(r != 0)) {
4748 		cik_mec_fini(rdev);
4749 		return r;
4750 	}
4751 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4752 			  &rdev->mec.hpd_eop_gpu_addr);
4753 	if (r) {
4754 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4755 		cik_mec_fini(rdev);
4756 		return r;
4757 	}
4758 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4759 	if (r) {
4760 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4761 		cik_mec_fini(rdev);
4762 		return r;
4763 	}
4764 
4765 	/* clear memory.  Not sure if this is required or not */
4766 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4767 
4768 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4769 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4770 
4771 	return 0;
4772 }
4773 
4774 struct hqd_registers
4775 {
4776 	u32 cp_mqd_base_addr;
4777 	u32 cp_mqd_base_addr_hi;
4778 	u32 cp_hqd_active;
4779 	u32 cp_hqd_vmid;
4780 	u32 cp_hqd_persistent_state;
4781 	u32 cp_hqd_pipe_priority;
4782 	u32 cp_hqd_queue_priority;
4783 	u32 cp_hqd_quantum;
4784 	u32 cp_hqd_pq_base;
4785 	u32 cp_hqd_pq_base_hi;
4786 	u32 cp_hqd_pq_rptr;
4787 	u32 cp_hqd_pq_rptr_report_addr;
4788 	u32 cp_hqd_pq_rptr_report_addr_hi;
4789 	u32 cp_hqd_pq_wptr_poll_addr;
4790 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4791 	u32 cp_hqd_pq_doorbell_control;
4792 	u32 cp_hqd_pq_wptr;
4793 	u32 cp_hqd_pq_control;
4794 	u32 cp_hqd_ib_base_addr;
4795 	u32 cp_hqd_ib_base_addr_hi;
4796 	u32 cp_hqd_ib_rptr;
4797 	u32 cp_hqd_ib_control;
4798 	u32 cp_hqd_iq_timer;
4799 	u32 cp_hqd_iq_rptr;
4800 	u32 cp_hqd_dequeue_request;
4801 	u32 cp_hqd_dma_offload;
4802 	u32 cp_hqd_sema_cmd;
4803 	u32 cp_hqd_msg_type;
4804 	u32 cp_hqd_atomic0_preop_lo;
4805 	u32 cp_hqd_atomic0_preop_hi;
4806 	u32 cp_hqd_atomic1_preop_lo;
4807 	u32 cp_hqd_atomic1_preop_hi;
4808 	u32 cp_hqd_hq_scheduler0;
4809 	u32 cp_hqd_hq_scheduler1;
4810 	u32 cp_mqd_control;
4811 };
4812 
4813 struct bonaire_mqd
4814 {
4815 	u32 header;
4816 	u32 dispatch_initiator;
4817 	u32 dimensions[3];
4818 	u32 start_idx[3];
4819 	u32 num_threads[3];
4820 	u32 pipeline_stat_enable;
4821 	u32 perf_counter_enable;
4822 	u32 pgm[2];
4823 	u32 tba[2];
4824 	u32 tma[2];
4825 	u32 pgm_rsrc[2];
4826 	u32 vmid;
4827 	u32 resource_limits;
4828 	u32 static_thread_mgmt01[2];
4829 	u32 tmp_ring_size;
4830 	u32 static_thread_mgmt23[2];
4831 	u32 restart[3];
4832 	u32 thread_trace_enable;
4833 	u32 reserved1;
4834 	u32 user_data[16];
4835 	u32 vgtcs_invoke_count[2];
4836 	struct hqd_registers queue_state;
4837 	u32 dequeue_cntr;
4838 	u32 interrupt_queue[64];
4839 };
4840 
4841 /**
4842  * cik_cp_compute_resume - setup the compute queue registers
4843  *
4844  * @rdev: radeon_device pointer
4845  *
4846  * Program the compute queues and test them to make sure they
4847  * are working.
4848  * Returns 0 for success, error for failure.
4849  */
cik_cp_compute_resume(struct radeon_device * rdev)4850 static int cik_cp_compute_resume(struct radeon_device *rdev)
4851 {
4852 	int r, i, j, idx;
4853 	u32 tmp;
4854 	bool use_doorbell = true;
4855 	u64 hqd_gpu_addr;
4856 	u64 mqd_gpu_addr;
4857 	u64 eop_gpu_addr;
4858 	u64 wb_gpu_addr;
4859 	u32 *buf;
4860 	struct bonaire_mqd *mqd;
4861 
4862 	r = cik_cp_compute_start(rdev);
4863 	if (r)
4864 		return r;
4865 
4866 	/* fix up chicken bits */
4867 	tmp = RREG32(CP_CPF_DEBUG);
4868 	tmp |= (1 << 23);
4869 	WREG32(CP_CPF_DEBUG, tmp);
4870 
4871 	/* init the pipes */
4872 	mutex_lock(&rdev->srbm_mutex);
4873 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4874 		int me = (i < 4) ? 1 : 2;
4875 		int pipe = (i < 4) ? i : (i - 4);
4876 
4877 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4878 
4879 		cik_srbm_select(rdev, me, pipe, 0, 0);
4880 
4881 		/* write the EOP addr */
4882 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4883 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4884 
4885 		/* set the VMID assigned */
4886 		WREG32(CP_HPD_EOP_VMID, 0);
4887 
4888 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4889 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4890 		tmp &= ~EOP_SIZE_MASK;
4891 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4892 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4893 	}
4894 	cik_srbm_select(rdev, 0, 0, 0, 0);
4895 	mutex_unlock(&rdev->srbm_mutex);
4896 
4897 	/* init the queues.  Just two for now. */
4898 	for (i = 0; i < 2; i++) {
4899 		if (i == 0)
4900 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4901 		else
4902 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4903 
4904 		if (rdev->ring[idx].mqd_obj == NULL) {
4905 			r = radeon_bo_create(rdev,
4906 					     sizeof(struct bonaire_mqd),
4907 					     PAGE_SIZE, true,
4908 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4909 					     NULL, &rdev->ring[idx].mqd_obj);
4910 			if (r) {
4911 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4912 				return r;
4913 			}
4914 		}
4915 
4916 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4917 		if (unlikely(r != 0)) {
4918 			cik_cp_compute_fini(rdev);
4919 			return r;
4920 		}
4921 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4922 				  &mqd_gpu_addr);
4923 		if (r) {
4924 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4925 			cik_cp_compute_fini(rdev);
4926 			return r;
4927 		}
4928 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4929 		if (r) {
4930 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4931 			cik_cp_compute_fini(rdev);
4932 			return r;
4933 		}
4934 
4935 		/* init the mqd struct */
4936 		memset(buf, 0, sizeof(struct bonaire_mqd));
4937 
4938 		mqd = (struct bonaire_mqd *)buf;
4939 		mqd->header = 0xC0310800;
4940 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4941 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4942 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4943 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4944 
4945 		mutex_lock(&rdev->srbm_mutex);
4946 		cik_srbm_select(rdev, rdev->ring[idx].me,
4947 				rdev->ring[idx].pipe,
4948 				rdev->ring[idx].queue, 0);
4949 
4950 		/* disable wptr polling */
4951 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4952 		tmp &= ~WPTR_POLL_EN;
4953 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4954 
4955 		/* enable doorbell? */
4956 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4957 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4958 		if (use_doorbell)
4959 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4960 		else
4961 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4962 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4963 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4964 
4965 		/* disable the queue if it's active */
4966 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4967 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4968 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4969 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4970 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4971 			for (j = 0; j < rdev->usec_timeout; j++) {
4972 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4973 					break;
4974 				udelay(1);
4975 			}
4976 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4977 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4978 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4979 		}
4980 
4981 		/* set the pointer to the MQD */
4982 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4983 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4984 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4985 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4986 		/* set MQD vmid to 0 */
4987 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4988 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4989 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4990 
4991 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4992 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4993 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4994 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4995 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4996 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4997 
4998 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4999 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5000 		mqd->queue_state.cp_hqd_pq_control &=
5001 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5002 
5003 		mqd->queue_state.cp_hqd_pq_control |=
5004 			order_base_2(rdev->ring[idx].ring_size / 8);
5005 		mqd->queue_state.cp_hqd_pq_control |=
5006 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5007 #ifdef __BIG_ENDIAN
5008 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5009 #endif
5010 		mqd->queue_state.cp_hqd_pq_control &=
5011 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5012 		mqd->queue_state.cp_hqd_pq_control |=
5013 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5014 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5015 
5016 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5017 		if (i == 0)
5018 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5019 		else
5020 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5021 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5022 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5023 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5024 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5025 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5026 
5027 		/* set the wb address wether it's enabled or not */
5028 		if (i == 0)
5029 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5030 		else
5031 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5032 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5033 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5034 			upper_32_bits(wb_gpu_addr) & 0xffff;
5035 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5036 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5037 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5038 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5039 
5040 		/* enable the doorbell if requested */
5041 		if (use_doorbell) {
5042 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5043 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5044 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5045 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5046 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5047 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5048 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5049 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5050 
5051 		} else {
5052 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5053 		}
5054 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5055 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5056 
5057 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5058 		rdev->ring[idx].wptr = 0;
5059 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5060 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5061 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5062 
5063 		/* set the vmid for the queue */
5064 		mqd->queue_state.cp_hqd_vmid = 0;
5065 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5066 
5067 		/* activate the queue */
5068 		mqd->queue_state.cp_hqd_active = 1;
5069 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5070 
5071 		cik_srbm_select(rdev, 0, 0, 0, 0);
5072 		mutex_unlock(&rdev->srbm_mutex);
5073 
5074 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5075 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5076 
5077 		rdev->ring[idx].ready = true;
5078 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5079 		if (r)
5080 			rdev->ring[idx].ready = false;
5081 	}
5082 
5083 	return 0;
5084 }
5085 
cik_cp_enable(struct radeon_device * rdev,bool enable)5086 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5087 {
5088 	cik_cp_gfx_enable(rdev, enable);
5089 	cik_cp_compute_enable(rdev, enable);
5090 }
5091 
cik_cp_load_microcode(struct radeon_device * rdev)5092 static int cik_cp_load_microcode(struct radeon_device *rdev)
5093 {
5094 	int r;
5095 
5096 	r = cik_cp_gfx_load_microcode(rdev);
5097 	if (r)
5098 		return r;
5099 	r = cik_cp_compute_load_microcode(rdev);
5100 	if (r)
5101 		return r;
5102 
5103 	return 0;
5104 }
5105 
cik_cp_fini(struct radeon_device * rdev)5106 static void cik_cp_fini(struct radeon_device *rdev)
5107 {
5108 	cik_cp_gfx_fini(rdev);
5109 	cik_cp_compute_fini(rdev);
5110 }
5111 
cik_cp_resume(struct radeon_device * rdev)5112 static int cik_cp_resume(struct radeon_device *rdev)
5113 {
5114 	int r;
5115 
5116 	cik_enable_gui_idle_interrupt(rdev, false);
5117 
5118 	r = cik_cp_load_microcode(rdev);
5119 	if (r)
5120 		return r;
5121 
5122 	r = cik_cp_gfx_resume(rdev);
5123 	if (r)
5124 		return r;
5125 	r = cik_cp_compute_resume(rdev);
5126 	if (r)
5127 		return r;
5128 
5129 	cik_enable_gui_idle_interrupt(rdev, true);
5130 
5131 	return 0;
5132 }
5133 
cik_print_gpu_status_regs(struct radeon_device * rdev)5134 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5135 {
5136 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5137 		RREG32(GRBM_STATUS));
5138 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5139 		RREG32(GRBM_STATUS2));
5140 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5141 		RREG32(GRBM_STATUS_SE0));
5142 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5143 		RREG32(GRBM_STATUS_SE1));
5144 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5145 		RREG32(GRBM_STATUS_SE2));
5146 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5147 		RREG32(GRBM_STATUS_SE3));
5148 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5149 		RREG32(SRBM_STATUS));
5150 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5151 		RREG32(SRBM_STATUS2));
5152 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5153 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5154 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5155 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5156 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5157 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5158 		 RREG32(CP_STALLED_STAT1));
5159 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5160 		 RREG32(CP_STALLED_STAT2));
5161 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5162 		 RREG32(CP_STALLED_STAT3));
5163 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5164 		 RREG32(CP_CPF_BUSY_STAT));
5165 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5166 		 RREG32(CP_CPF_STALLED_STAT1));
5167 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5168 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5169 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5170 		 RREG32(CP_CPC_STALLED_STAT1));
5171 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5172 }
5173 
5174 /**
5175  * cik_gpu_check_soft_reset - check which blocks are busy
5176  *
5177  * @rdev: radeon_device pointer
5178  *
5179  * Check which blocks are busy and return the relevant reset
5180  * mask to be used by cik_gpu_soft_reset().
5181  * Returns a mask of the blocks to be reset.
5182  */
cik_gpu_check_soft_reset(struct radeon_device * rdev)5183 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5184 {
5185 	u32 reset_mask = 0;
5186 	u32 tmp;
5187 
5188 	/* GRBM_STATUS */
5189 	tmp = RREG32(GRBM_STATUS);
5190 	if (tmp & (PA_BUSY | SC_BUSY |
5191 		   BCI_BUSY | SX_BUSY |
5192 		   TA_BUSY | VGT_BUSY |
5193 		   DB_BUSY | CB_BUSY |
5194 		   GDS_BUSY | SPI_BUSY |
5195 		   IA_BUSY | IA_BUSY_NO_DMA))
5196 		reset_mask |= RADEON_RESET_GFX;
5197 
5198 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5199 		reset_mask |= RADEON_RESET_CP;
5200 
5201 	/* GRBM_STATUS2 */
5202 	tmp = RREG32(GRBM_STATUS2);
5203 	if (tmp & RLC_BUSY)
5204 		reset_mask |= RADEON_RESET_RLC;
5205 
5206 	/* SDMA0_STATUS_REG */
5207 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5208 	if (!(tmp & SDMA_IDLE))
5209 		reset_mask |= RADEON_RESET_DMA;
5210 
5211 	/* SDMA1_STATUS_REG */
5212 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5213 	if (!(tmp & SDMA_IDLE))
5214 		reset_mask |= RADEON_RESET_DMA1;
5215 
5216 	/* SRBM_STATUS2 */
5217 	tmp = RREG32(SRBM_STATUS2);
5218 	if (tmp & SDMA_BUSY)
5219 		reset_mask |= RADEON_RESET_DMA;
5220 
5221 	if (tmp & SDMA1_BUSY)
5222 		reset_mask |= RADEON_RESET_DMA1;
5223 
5224 	/* SRBM_STATUS */
5225 	tmp = RREG32(SRBM_STATUS);
5226 
5227 	if (tmp & IH_BUSY)
5228 		reset_mask |= RADEON_RESET_IH;
5229 
5230 	if (tmp & SEM_BUSY)
5231 		reset_mask |= RADEON_RESET_SEM;
5232 
5233 	if (tmp & GRBM_RQ_PENDING)
5234 		reset_mask |= RADEON_RESET_GRBM;
5235 
5236 	if (tmp & VMC_BUSY)
5237 		reset_mask |= RADEON_RESET_VMC;
5238 
5239 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5240 		   MCC_BUSY | MCD_BUSY))
5241 		reset_mask |= RADEON_RESET_MC;
5242 
5243 	if (evergreen_is_display_hung(rdev))
5244 		reset_mask |= RADEON_RESET_DISPLAY;
5245 
5246 	/* Skip MC reset as it's mostly likely not hung, just busy */
5247 	if (reset_mask & RADEON_RESET_MC) {
5248 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5249 		reset_mask &= ~RADEON_RESET_MC;
5250 	}
5251 
5252 	return reset_mask;
5253 }
5254 
5255 /**
5256  * cik_gpu_soft_reset - soft reset GPU
5257  *
5258  * @rdev: radeon_device pointer
5259  * @reset_mask: mask of which blocks to reset
5260  *
5261  * Soft reset the blocks specified in @reset_mask.
5262  */
cik_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)5263 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5264 {
5265 	struct evergreen_mc_save save;
5266 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5267 	u32 tmp;
5268 
5269 	if (reset_mask == 0)
5270 		return;
5271 
5272 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5273 
5274 	cik_print_gpu_status_regs(rdev);
5275 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5276 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5277 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5278 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5279 
5280 	/* disable CG/PG */
5281 	cik_fini_pg(rdev);
5282 	cik_fini_cg(rdev);
5283 
5284 	/* stop the rlc */
5285 	cik_rlc_stop(rdev);
5286 
5287 	/* Disable GFX parsing/prefetching */
5288 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5289 
5290 	/* Disable MEC parsing/prefetching */
5291 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5292 
5293 	if (reset_mask & RADEON_RESET_DMA) {
5294 		/* sdma0 */
5295 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5296 		tmp |= SDMA_HALT;
5297 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5298 	}
5299 	if (reset_mask & RADEON_RESET_DMA1) {
5300 		/* sdma1 */
5301 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5302 		tmp |= SDMA_HALT;
5303 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5304 	}
5305 
5306 	evergreen_mc_stop(rdev, &save);
5307 	if (evergreen_mc_wait_for_idle(rdev)) {
5308 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5309 	}
5310 
5311 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5312 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5313 
5314 	if (reset_mask & RADEON_RESET_CP) {
5315 		grbm_soft_reset |= SOFT_RESET_CP;
5316 
5317 		srbm_soft_reset |= SOFT_RESET_GRBM;
5318 	}
5319 
5320 	if (reset_mask & RADEON_RESET_DMA)
5321 		srbm_soft_reset |= SOFT_RESET_SDMA;
5322 
5323 	if (reset_mask & RADEON_RESET_DMA1)
5324 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5325 
5326 	if (reset_mask & RADEON_RESET_DISPLAY)
5327 		srbm_soft_reset |= SOFT_RESET_DC;
5328 
5329 	if (reset_mask & RADEON_RESET_RLC)
5330 		grbm_soft_reset |= SOFT_RESET_RLC;
5331 
5332 	if (reset_mask & RADEON_RESET_SEM)
5333 		srbm_soft_reset |= SOFT_RESET_SEM;
5334 
5335 	if (reset_mask & RADEON_RESET_IH)
5336 		srbm_soft_reset |= SOFT_RESET_IH;
5337 
5338 	if (reset_mask & RADEON_RESET_GRBM)
5339 		srbm_soft_reset |= SOFT_RESET_GRBM;
5340 
5341 	if (reset_mask & RADEON_RESET_VMC)
5342 		srbm_soft_reset |= SOFT_RESET_VMC;
5343 
5344 	if (!(rdev->flags & RADEON_IS_IGP)) {
5345 		if (reset_mask & RADEON_RESET_MC)
5346 			srbm_soft_reset |= SOFT_RESET_MC;
5347 	}
5348 
5349 	if (grbm_soft_reset) {
5350 		tmp = RREG32(GRBM_SOFT_RESET);
5351 		tmp |= grbm_soft_reset;
5352 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5353 		WREG32(GRBM_SOFT_RESET, tmp);
5354 		tmp = RREG32(GRBM_SOFT_RESET);
5355 
5356 		udelay(50);
5357 
5358 		tmp &= ~grbm_soft_reset;
5359 		WREG32(GRBM_SOFT_RESET, tmp);
5360 		tmp = RREG32(GRBM_SOFT_RESET);
5361 	}
5362 
5363 	if (srbm_soft_reset) {
5364 		tmp = RREG32(SRBM_SOFT_RESET);
5365 		tmp |= srbm_soft_reset;
5366 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5367 		WREG32(SRBM_SOFT_RESET, tmp);
5368 		tmp = RREG32(SRBM_SOFT_RESET);
5369 
5370 		udelay(50);
5371 
5372 		tmp &= ~srbm_soft_reset;
5373 		WREG32(SRBM_SOFT_RESET, tmp);
5374 		tmp = RREG32(SRBM_SOFT_RESET);
5375 	}
5376 
5377 	/* Wait a little for things to settle down */
5378 	udelay(50);
5379 
5380 	evergreen_mc_resume(rdev, &save);
5381 	udelay(50);
5382 
5383 	cik_print_gpu_status_regs(rdev);
5384 }
5385 
5386 struct kv_reset_save_regs {
5387 	u32 gmcon_reng_execute;
5388 	u32 gmcon_misc;
5389 	u32 gmcon_misc3;
5390 };
5391 
kv_save_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5392 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5393 				   struct kv_reset_save_regs *save)
5394 {
5395 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5396 	save->gmcon_misc = RREG32(GMCON_MISC);
5397 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5398 
5399 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5400 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5401 						STCTRL_STUTTER_EN));
5402 }
5403 
kv_restore_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5404 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5405 				      struct kv_reset_save_regs *save)
5406 {
5407 	int i;
5408 
5409 	WREG32(GMCON_PGFSM_WRITE, 0);
5410 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5411 
5412 	for (i = 0; i < 5; i++)
5413 		WREG32(GMCON_PGFSM_WRITE, 0);
5414 
5415 	WREG32(GMCON_PGFSM_WRITE, 0);
5416 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5417 
5418 	for (i = 0; i < 5; i++)
5419 		WREG32(GMCON_PGFSM_WRITE, 0);
5420 
5421 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5422 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5423 
5424 	for (i = 0; i < 5; i++)
5425 		WREG32(GMCON_PGFSM_WRITE, 0);
5426 
5427 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5428 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5429 
5430 	for (i = 0; i < 5; i++)
5431 		WREG32(GMCON_PGFSM_WRITE, 0);
5432 
5433 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5434 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5435 
5436 	for (i = 0; i < 5; i++)
5437 		WREG32(GMCON_PGFSM_WRITE, 0);
5438 
5439 	WREG32(GMCON_PGFSM_WRITE, 0);
5440 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5441 
5442 	for (i = 0; i < 5; i++)
5443 		WREG32(GMCON_PGFSM_WRITE, 0);
5444 
5445 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5446 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5447 
5448 	for (i = 0; i < 5; i++)
5449 		WREG32(GMCON_PGFSM_WRITE, 0);
5450 
5451 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5452 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5453 
5454 	for (i = 0; i < 5; i++)
5455 		WREG32(GMCON_PGFSM_WRITE, 0);
5456 
5457 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5458 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5459 
5460 	for (i = 0; i < 5; i++)
5461 		WREG32(GMCON_PGFSM_WRITE, 0);
5462 
5463 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5464 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5465 
5466 	for (i = 0; i < 5; i++)
5467 		WREG32(GMCON_PGFSM_WRITE, 0);
5468 
5469 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5470 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5471 
5472 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5473 	WREG32(GMCON_MISC, save->gmcon_misc);
5474 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5475 }
5476 
cik_gpu_pci_config_reset(struct radeon_device * rdev)5477 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5478 {
5479 	struct evergreen_mc_save save;
5480 	struct kv_reset_save_regs kv_save = { 0 };
5481 	u32 tmp, i;
5482 
5483 	dev_info(rdev->dev, "GPU pci config reset\n");
5484 
5485 	/* disable dpm? */
5486 
5487 	/* disable cg/pg */
5488 	cik_fini_pg(rdev);
5489 	cik_fini_cg(rdev);
5490 
5491 	/* Disable GFX parsing/prefetching */
5492 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5493 
5494 	/* Disable MEC parsing/prefetching */
5495 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5496 
5497 	/* sdma0 */
5498 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5499 	tmp |= SDMA_HALT;
5500 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5501 	/* sdma1 */
5502 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5503 	tmp |= SDMA_HALT;
5504 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5505 	/* XXX other engines? */
5506 
5507 	/* halt the rlc, disable cp internal ints */
5508 	cik_rlc_stop(rdev);
5509 
5510 	udelay(50);
5511 
5512 	/* disable mem access */
5513 	evergreen_mc_stop(rdev, &save);
5514 	if (evergreen_mc_wait_for_idle(rdev)) {
5515 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5516 	}
5517 
5518 	if (rdev->flags & RADEON_IS_IGP)
5519 		kv_save_regs_for_reset(rdev, &kv_save);
5520 
5521 	/* disable BM */
5522 	pci_clear_master(rdev->pdev);
5523 	/* reset */
5524 	radeon_pci_config_reset(rdev);
5525 
5526 	udelay(100);
5527 
5528 	/* wait for asic to come out of reset */
5529 	for (i = 0; i < rdev->usec_timeout; i++) {
5530 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5531 			break;
5532 		udelay(1);
5533 	}
5534 
5535 	/* does asic init need to be run first??? */
5536 	if (rdev->flags & RADEON_IS_IGP)
5537 		kv_restore_regs_for_reset(rdev, &kv_save);
5538 }
5539 
5540 /**
5541  * cik_asic_reset - soft reset GPU
5542  *
5543  * @rdev: radeon_device pointer
5544  *
5545  * Look up which blocks are hung and attempt
5546  * to reset them.
5547  * Returns 0 for success.
5548  */
cik_asic_reset(struct radeon_device * rdev)5549 int cik_asic_reset(struct radeon_device *rdev)
5550 {
5551 	u32 reset_mask;
5552 
5553 	reset_mask = cik_gpu_check_soft_reset(rdev);
5554 
5555 	if (reset_mask)
5556 		r600_set_bios_scratch_engine_hung(rdev, true);
5557 
5558 	/* try soft reset */
5559 	cik_gpu_soft_reset(rdev, reset_mask);
5560 
5561 	reset_mask = cik_gpu_check_soft_reset(rdev);
5562 
5563 	/* try pci config reset */
5564 	if (reset_mask && radeon_hard_reset)
5565 		cik_gpu_pci_config_reset(rdev);
5566 
5567 	reset_mask = cik_gpu_check_soft_reset(rdev);
5568 
5569 	if (!reset_mask)
5570 		r600_set_bios_scratch_engine_hung(rdev, false);
5571 
5572 	return 0;
5573 }
5574 
5575 /**
5576  * cik_gfx_is_lockup - check if the 3D engine is locked up
5577  *
5578  * @rdev: radeon_device pointer
5579  * @ring: radeon_ring structure holding ring information
5580  *
5581  * Check if the 3D engine is locked up (CIK).
5582  * Returns true if the engine is locked, false if not.
5583  */
cik_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)5584 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5585 {
5586 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5587 
5588 	if (!(reset_mask & (RADEON_RESET_GFX |
5589 			    RADEON_RESET_COMPUTE |
5590 			    RADEON_RESET_CP))) {
5591 		radeon_ring_lockup_update(rdev, ring);
5592 		return false;
5593 	}
5594 	return radeon_ring_test_lockup(rdev, ring);
5595 }
5596 
5597 /* MC */
5598 /**
5599  * cik_mc_program - program the GPU memory controller
5600  *
5601  * @rdev: radeon_device pointer
5602  *
5603  * Set the location of vram, gart, and AGP in the GPU's
5604  * physical address space (CIK).
5605  */
cik_mc_program(struct radeon_device * rdev)5606 static void cik_mc_program(struct radeon_device *rdev)
5607 {
5608 	struct evergreen_mc_save save;
5609 	u32 tmp;
5610 	int i, j;
5611 
5612 	/* Initialize HDP */
5613 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5614 		WREG32((0x2c14 + j), 0x00000000);
5615 		WREG32((0x2c18 + j), 0x00000000);
5616 		WREG32((0x2c1c + j), 0x00000000);
5617 		WREG32((0x2c20 + j), 0x00000000);
5618 		WREG32((0x2c24 + j), 0x00000000);
5619 	}
5620 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5621 
5622 	evergreen_mc_stop(rdev, &save);
5623 	if (radeon_mc_wait_for_idle(rdev)) {
5624 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5625 	}
5626 	/* Lockout access through VGA aperture*/
5627 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5628 	/* Update configuration */
5629 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5630 	       rdev->mc.vram_start >> 12);
5631 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5632 	       rdev->mc.vram_end >> 12);
5633 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5634 	       rdev->vram_scratch.gpu_addr >> 12);
5635 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5636 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5637 	WREG32(MC_VM_FB_LOCATION, tmp);
5638 	/* XXX double check these! */
5639 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5640 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5641 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5642 	WREG32(MC_VM_AGP_BASE, 0);
5643 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5644 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5645 	if (radeon_mc_wait_for_idle(rdev)) {
5646 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5647 	}
5648 	evergreen_mc_resume(rdev, &save);
5649 	/* we need to own VRAM, so turn off the VGA renderer here
5650 	 * to stop it overwriting our objects */
5651 	rv515_vga_render_disable(rdev);
5652 }
5653 
5654 /**
5655  * cik_mc_init - initialize the memory controller driver params
5656  *
5657  * @rdev: radeon_device pointer
5658  *
5659  * Look up the amount of vram, vram width, and decide how to place
5660  * vram and gart within the GPU's physical address space (CIK).
5661  * Returns 0 for success.
5662  */
cik_mc_init(struct radeon_device * rdev)5663 static int cik_mc_init(struct radeon_device *rdev)
5664 {
5665 	u32 tmp;
5666 	int chansize, numchan;
5667 
5668 	/* Get VRAM informations */
5669 	rdev->mc.vram_is_ddr = true;
5670 	tmp = RREG32(MC_ARB_RAMCFG);
5671 	if (tmp & CHANSIZE_MASK) {
5672 		chansize = 64;
5673 	} else {
5674 		chansize = 32;
5675 	}
5676 	tmp = RREG32(MC_SHARED_CHMAP);
5677 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5678 	case 0:
5679 	default:
5680 		numchan = 1;
5681 		break;
5682 	case 1:
5683 		numchan = 2;
5684 		break;
5685 	case 2:
5686 		numchan = 4;
5687 		break;
5688 	case 3:
5689 		numchan = 8;
5690 		break;
5691 	case 4:
5692 		numchan = 3;
5693 		break;
5694 	case 5:
5695 		numchan = 6;
5696 		break;
5697 	case 6:
5698 		numchan = 10;
5699 		break;
5700 	case 7:
5701 		numchan = 12;
5702 		break;
5703 	case 8:
5704 		numchan = 16;
5705 		break;
5706 	}
5707 	rdev->mc.vram_width = numchan * chansize;
5708 	/* Could aper size report 0 ? */
5709 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5710 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5711 	/* size in MB on si */
5712 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5713 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5714 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5715 	si_vram_gtt_location(rdev, &rdev->mc);
5716 	radeon_update_bandwidth_info(rdev);
5717 
5718 	return 0;
5719 }
5720 
5721 /*
5722  * GART
5723  * VMID 0 is the physical GPU addresses as used by the kernel.
5724  * VMIDs 1-15 are used for userspace clients and are handled
5725  * by the radeon vm/hsa code.
5726  */
5727 /**
5728  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5729  *
5730  * @rdev: radeon_device pointer
5731  *
5732  * Flush the TLB for the VMID 0 page table (CIK).
5733  */
cik_pcie_gart_tlb_flush(struct radeon_device * rdev)5734 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5735 {
5736 	/* flush hdp cache */
5737 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5738 
5739 	/* bits 0-15 are the VM contexts0-15 */
5740 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5741 }
5742 
5743 /**
5744  * cik_pcie_gart_enable - gart enable
5745  *
5746  * @rdev: radeon_device pointer
5747  *
5748  * This sets up the TLBs, programs the page tables for VMID0,
5749  * sets up the hw for VMIDs 1-15 which are allocated on
5750  * demand, and sets up the global locations for the LDS, GDS,
5751  * and GPUVM for FSA64 clients (CIK).
5752  * Returns 0 for success, errors for failure.
5753  */
cik_pcie_gart_enable(struct radeon_device * rdev)5754 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5755 {
5756 	int r, i;
5757 
5758 	if (rdev->gart.robj == NULL) {
5759 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5760 		return -EINVAL;
5761 	}
5762 	r = radeon_gart_table_vram_pin(rdev);
5763 	if (r)
5764 		return r;
5765 	/* Setup TLB control */
5766 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5767 	       (0xA << 7) |
5768 	       ENABLE_L1_TLB |
5769 	       ENABLE_L1_FRAGMENT_PROCESSING |
5770 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5771 	       ENABLE_ADVANCED_DRIVER_MODEL |
5772 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5773 	/* Setup L2 cache */
5774 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5775 	       ENABLE_L2_FRAGMENT_PROCESSING |
5776 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5777 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5778 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5779 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5780 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5781 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5782 	       BANK_SELECT(4) |
5783 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5784 	/* setup context0 */
5785 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5786 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5787 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5788 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5789 			(u32)(rdev->dummy_page.addr >> 12));
5790 	WREG32(VM_CONTEXT0_CNTL2, 0);
5791 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5792 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5793 
5794 	WREG32(0x15D4, 0);
5795 	WREG32(0x15D8, 0);
5796 	WREG32(0x15DC, 0);
5797 
5798 	/* restore context1-15 */
5799 	/* set vm size, must be a multiple of 4 */
5800 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5801 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5802 	for (i = 1; i < 16; i++) {
5803 		if (i < 8)
5804 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5805 			       rdev->vm_manager.saved_table_addr[i]);
5806 		else
5807 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5808 			       rdev->vm_manager.saved_table_addr[i]);
5809 	}
5810 
5811 	/* enable context1-15 */
5812 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5813 	       (u32)(rdev->dummy_page.addr >> 12));
5814 	WREG32(VM_CONTEXT1_CNTL2, 4);
5815 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5816 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5817 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5818 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5819 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5820 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5821 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5822 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5823 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5824 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5825 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5826 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5827 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5828 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5829 
5830 	if (rdev->family == CHIP_KAVERI) {
5831 		u32 tmp = RREG32(CHUB_CONTROL);
5832 		tmp &= ~BYPASS_VM;
5833 		WREG32(CHUB_CONTROL, tmp);
5834 	}
5835 
5836 	/* XXX SH_MEM regs */
5837 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5838 	mutex_lock(&rdev->srbm_mutex);
5839 	for (i = 0; i < 16; i++) {
5840 		cik_srbm_select(rdev, 0, 0, 0, i);
5841 		/* CP and shaders */
5842 		WREG32(SH_MEM_CONFIG, 0);
5843 		WREG32(SH_MEM_APE1_BASE, 1);
5844 		WREG32(SH_MEM_APE1_LIMIT, 0);
5845 		WREG32(SH_MEM_BASES, 0);
5846 		/* SDMA GFX */
5847 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5848 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5849 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5850 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5851 		/* XXX SDMA RLC - todo */
5852 	}
5853 	cik_srbm_select(rdev, 0, 0, 0, 0);
5854 	mutex_unlock(&rdev->srbm_mutex);
5855 
5856 	cik_pcie_gart_tlb_flush(rdev);
5857 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5858 		 (unsigned)(rdev->mc.gtt_size >> 20),
5859 		 (unsigned long long)rdev->gart.table_addr);
5860 	rdev->gart.ready = true;
5861 	return 0;
5862 }
5863 
5864 /**
5865  * cik_pcie_gart_disable - gart disable
5866  *
5867  * @rdev: radeon_device pointer
5868  *
5869  * This disables all VM page table (CIK).
5870  */
cik_pcie_gart_disable(struct radeon_device * rdev)5871 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5872 {
5873 	unsigned i;
5874 
5875 	for (i = 1; i < 16; ++i) {
5876 		uint32_t reg;
5877 		if (i < 8)
5878 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5879 		else
5880 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5881 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5882 	}
5883 
5884 	/* Disable all tables */
5885 	WREG32(VM_CONTEXT0_CNTL, 0);
5886 	WREG32(VM_CONTEXT1_CNTL, 0);
5887 	/* Setup TLB control */
5888 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5889 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5890 	/* Setup L2 cache */
5891 	WREG32(VM_L2_CNTL,
5892 	       ENABLE_L2_FRAGMENT_PROCESSING |
5893 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5894 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5895 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5896 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5897 	WREG32(VM_L2_CNTL2, 0);
5898 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5899 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5900 	radeon_gart_table_vram_unpin(rdev);
5901 }
5902 
5903 /**
5904  * cik_pcie_gart_fini - vm fini callback
5905  *
5906  * @rdev: radeon_device pointer
5907  *
5908  * Tears down the driver GART/VM setup (CIK).
5909  */
cik_pcie_gart_fini(struct radeon_device * rdev)5910 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5911 {
5912 	cik_pcie_gart_disable(rdev);
5913 	radeon_gart_table_vram_free(rdev);
5914 	radeon_gart_fini(rdev);
5915 }
5916 
5917 /* vm parser */
5918 /**
5919  * cik_ib_parse - vm ib_parse callback
5920  *
5921  * @rdev: radeon_device pointer
5922  * @ib: indirect buffer pointer
5923  *
5924  * CIK uses hw IB checking so this is a nop (CIK).
5925  */
cik_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)5926 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5927 {
5928 	return 0;
5929 }
5930 
5931 /*
5932  * vm
5933  * VMID 0 is the physical GPU addresses as used by the kernel.
5934  * VMIDs 1-15 are used for userspace clients and are handled
5935  * by the radeon vm/hsa code.
5936  */
5937 /**
5938  * cik_vm_init - cik vm init callback
5939  *
5940  * @rdev: radeon_device pointer
5941  *
5942  * Inits cik specific vm parameters (number of VMs, base of vram for
5943  * VMIDs 1-15) (CIK).
5944  * Returns 0 for success.
5945  */
cik_vm_init(struct radeon_device * rdev)5946 int cik_vm_init(struct radeon_device *rdev)
5947 {
5948 	/* number of VMs */
5949 	rdev->vm_manager.nvm = 16;
5950 	/* base offset of vram pages */
5951 	if (rdev->flags & RADEON_IS_IGP) {
5952 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5953 		tmp <<= 22;
5954 		rdev->vm_manager.vram_base_offset = tmp;
5955 	} else
5956 		rdev->vm_manager.vram_base_offset = 0;
5957 
5958 	return 0;
5959 }
5960 
5961 /**
5962  * cik_vm_fini - cik vm fini callback
5963  *
5964  * @rdev: radeon_device pointer
5965  *
5966  * Tear down any asic specific VM setup (CIK).
5967  */
cik_vm_fini(struct radeon_device * rdev)5968 void cik_vm_fini(struct radeon_device *rdev)
5969 {
5970 }
5971 
5972 /**
5973  * cik_vm_decode_fault - print human readable fault info
5974  *
5975  * @rdev: radeon_device pointer
5976  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5977  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5978  *
5979  * Print human readable fault information (CIK).
5980  */
cik_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr,u32 mc_client)5981 static void cik_vm_decode_fault(struct radeon_device *rdev,
5982 				u32 status, u32 addr, u32 mc_client)
5983 {
5984 	u32 mc_id;
5985 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5986 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5987 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5988 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5989 
5990 	if (rdev->family == CHIP_HAWAII)
5991 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5992 	else
5993 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5994 
5995 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5996 	       protections, vmid, addr,
5997 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5998 	       block, mc_client, mc_id);
5999 }
6000 
6001 /**
6002  * cik_vm_flush - cik vm flush using the CP
6003  *
6004  * @rdev: radeon_device pointer
6005  *
6006  * Update the page table base and flush the VM TLB
6007  * using the CP (CIK).
6008  */
cik_vm_flush(struct radeon_device * rdev,int ridx,struct radeon_vm * vm)6009 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
6010 {
6011 	struct radeon_ring *ring = &rdev->ring[ridx];
6012 	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
6013 
6014 	if (vm == NULL)
6015 		return;
6016 
6017 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6018 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6019 				 WRITE_DATA_DST_SEL(0)));
6020 	if (vm->id < 8) {
6021 		radeon_ring_write(ring,
6022 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
6023 	} else {
6024 		radeon_ring_write(ring,
6025 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
6026 	}
6027 	radeon_ring_write(ring, 0);
6028 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
6029 
6030 	/* update SH_MEM_* regs */
6031 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6032 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6033 				 WRITE_DATA_DST_SEL(0)));
6034 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6035 	radeon_ring_write(ring, 0);
6036 	radeon_ring_write(ring, VMID(vm->id));
6037 
6038 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6039 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6040 				 WRITE_DATA_DST_SEL(0)));
6041 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6042 	radeon_ring_write(ring, 0);
6043 
6044 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6045 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6046 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6047 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6048 
6049 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6050 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6051 				 WRITE_DATA_DST_SEL(0)));
6052 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6053 	radeon_ring_write(ring, 0);
6054 	radeon_ring_write(ring, VMID(0));
6055 
6056 	/* HDP flush */
6057 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
6058 
6059 	/* bits 0-15 are the VM contexts0-15 */
6060 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6061 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6062 				 WRITE_DATA_DST_SEL(0)));
6063 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6064 	radeon_ring_write(ring, 0);
6065 	radeon_ring_write(ring, 1 << vm->id);
6066 
6067 	/* compute doesn't have PFP */
6068 	if (usepfp) {
6069 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6070 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6071 		radeon_ring_write(ring, 0x0);
6072 	}
6073 }
6074 
6075 /*
6076  * RLC
6077  * The RLC is a multi-purpose microengine that handles a
6078  * variety of functions, the most important of which is
6079  * the interrupt controller.
6080  */
cik_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)6081 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6082 					  bool enable)
6083 {
6084 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6085 
6086 	if (enable)
6087 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6088 	else
6089 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6090 	WREG32(CP_INT_CNTL_RING0, tmp);
6091 }
6092 
cik_enable_lbpw(struct radeon_device * rdev,bool enable)6093 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6094 {
6095 	u32 tmp;
6096 
6097 	tmp = RREG32(RLC_LB_CNTL);
6098 	if (enable)
6099 		tmp |= LOAD_BALANCE_ENABLE;
6100 	else
6101 		tmp &= ~LOAD_BALANCE_ENABLE;
6102 	WREG32(RLC_LB_CNTL, tmp);
6103 }
6104 
cik_wait_for_rlc_serdes(struct radeon_device * rdev)6105 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6106 {
6107 	u32 i, j, k;
6108 	u32 mask;
6109 
6110 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6111 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6112 			cik_select_se_sh(rdev, i, j);
6113 			for (k = 0; k < rdev->usec_timeout; k++) {
6114 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6115 					break;
6116 				udelay(1);
6117 			}
6118 		}
6119 	}
6120 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6121 
6122 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6123 	for (k = 0; k < rdev->usec_timeout; k++) {
6124 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6125 			break;
6126 		udelay(1);
6127 	}
6128 }
6129 
cik_update_rlc(struct radeon_device * rdev,u32 rlc)6130 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6131 {
6132 	u32 tmp;
6133 
6134 	tmp = RREG32(RLC_CNTL);
6135 	if (tmp != rlc)
6136 		WREG32(RLC_CNTL, rlc);
6137 }
6138 
cik_halt_rlc(struct radeon_device * rdev)6139 static u32 cik_halt_rlc(struct radeon_device *rdev)
6140 {
6141 	u32 data, orig;
6142 
6143 	orig = data = RREG32(RLC_CNTL);
6144 
6145 	if (data & RLC_ENABLE) {
6146 		u32 i;
6147 
6148 		data &= ~RLC_ENABLE;
6149 		WREG32(RLC_CNTL, data);
6150 
6151 		for (i = 0; i < rdev->usec_timeout; i++) {
6152 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6153 				break;
6154 			udelay(1);
6155 		}
6156 
6157 		cik_wait_for_rlc_serdes(rdev);
6158 	}
6159 
6160 	return orig;
6161 }
6162 
cik_enter_rlc_safe_mode(struct radeon_device * rdev)6163 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6164 {
6165 	u32 tmp, i, mask;
6166 
6167 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6168 	WREG32(RLC_GPR_REG2, tmp);
6169 
6170 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6171 	for (i = 0; i < rdev->usec_timeout; i++) {
6172 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6173 			break;
6174 		udelay(1);
6175 	}
6176 
6177 	for (i = 0; i < rdev->usec_timeout; i++) {
6178 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6179 			break;
6180 		udelay(1);
6181 	}
6182 }
6183 
cik_exit_rlc_safe_mode(struct radeon_device * rdev)6184 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6185 {
6186 	u32 tmp;
6187 
6188 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6189 	WREG32(RLC_GPR_REG2, tmp);
6190 }
6191 
6192 /**
6193  * cik_rlc_stop - stop the RLC ME
6194  *
6195  * @rdev: radeon_device pointer
6196  *
6197  * Halt the RLC ME (MicroEngine) (CIK).
6198  */
cik_rlc_stop(struct radeon_device * rdev)6199 static void cik_rlc_stop(struct radeon_device *rdev)
6200 {
6201 	WREG32(RLC_CNTL, 0);
6202 
6203 	cik_enable_gui_idle_interrupt(rdev, false);
6204 
6205 	cik_wait_for_rlc_serdes(rdev);
6206 }
6207 
6208 /**
6209  * cik_rlc_start - start the RLC ME
6210  *
6211  * @rdev: radeon_device pointer
6212  *
6213  * Unhalt the RLC ME (MicroEngine) (CIK).
6214  */
cik_rlc_start(struct radeon_device * rdev)6215 static void cik_rlc_start(struct radeon_device *rdev)
6216 {
6217 	WREG32(RLC_CNTL, RLC_ENABLE);
6218 
6219 	cik_enable_gui_idle_interrupt(rdev, true);
6220 
6221 	udelay(50);
6222 }
6223 
6224 /**
6225  * cik_rlc_resume - setup the RLC hw
6226  *
6227  * @rdev: radeon_device pointer
6228  *
6229  * Initialize the RLC registers, load the ucode,
6230  * and start the RLC (CIK).
6231  * Returns 0 for success, -EINVAL if the ucode is not available.
6232  */
cik_rlc_resume(struct radeon_device * rdev)6233 static int cik_rlc_resume(struct radeon_device *rdev)
6234 {
6235 	u32 i, size, tmp;
6236 
6237 	if (!rdev->rlc_fw)
6238 		return -EINVAL;
6239 
6240 	cik_rlc_stop(rdev);
6241 
6242 	/* disable CG */
6243 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6244 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6245 
6246 	si_rlc_reset(rdev);
6247 
6248 	cik_init_pg(rdev);
6249 
6250 	cik_init_cg(rdev);
6251 
6252 	WREG32(RLC_LB_CNTR_INIT, 0);
6253 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6254 
6255 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6256 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6257 	WREG32(RLC_LB_PARAMS, 0x00600408);
6258 	WREG32(RLC_LB_CNTL, 0x80000004);
6259 
6260 	WREG32(RLC_MC_CNTL, 0);
6261 	WREG32(RLC_UCODE_CNTL, 0);
6262 
6263 	if (rdev->new_fw) {
6264 		const struct rlc_firmware_header_v1_0 *hdr =
6265 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6266 		const __le32 *fw_data = (const __le32 *)
6267 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6268 
6269 		radeon_ucode_print_rlc_hdr(&hdr->header);
6270 
6271 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6272 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6273 		for (i = 0; i < size; i++)
6274 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6275 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6276 	} else {
6277 		const __be32 *fw_data;
6278 
6279 		switch (rdev->family) {
6280 		case CHIP_BONAIRE:
6281 		case CHIP_HAWAII:
6282 		default:
6283 			size = BONAIRE_RLC_UCODE_SIZE;
6284 			break;
6285 		case CHIP_KAVERI:
6286 			size = KV_RLC_UCODE_SIZE;
6287 			break;
6288 		case CHIP_KABINI:
6289 			size = KB_RLC_UCODE_SIZE;
6290 			break;
6291 		case CHIP_MULLINS:
6292 			size = ML_RLC_UCODE_SIZE;
6293 			break;
6294 		}
6295 
6296 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6297 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6298 		for (i = 0; i < size; i++)
6299 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6300 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6301 	}
6302 
6303 	/* XXX - find out what chips support lbpw */
6304 	cik_enable_lbpw(rdev, false);
6305 
6306 	if (rdev->family == CHIP_BONAIRE)
6307 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6308 
6309 	cik_rlc_start(rdev);
6310 
6311 	return 0;
6312 }
6313 
cik_enable_cgcg(struct radeon_device * rdev,bool enable)6314 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6315 {
6316 	u32 data, orig, tmp, tmp2;
6317 
6318 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6319 
6320 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6321 		cik_enable_gui_idle_interrupt(rdev, true);
6322 
6323 		tmp = cik_halt_rlc(rdev);
6324 
6325 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6326 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6327 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6328 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6329 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6330 
6331 		cik_update_rlc(rdev, tmp);
6332 
6333 		data |= CGCG_EN | CGLS_EN;
6334 	} else {
6335 		cik_enable_gui_idle_interrupt(rdev, false);
6336 
6337 		RREG32(CB_CGTT_SCLK_CTRL);
6338 		RREG32(CB_CGTT_SCLK_CTRL);
6339 		RREG32(CB_CGTT_SCLK_CTRL);
6340 		RREG32(CB_CGTT_SCLK_CTRL);
6341 
6342 		data &= ~(CGCG_EN | CGLS_EN);
6343 	}
6344 
6345 	if (orig != data)
6346 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6347 
6348 }
6349 
cik_enable_mgcg(struct radeon_device * rdev,bool enable)6350 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6351 {
6352 	u32 data, orig, tmp = 0;
6353 
6354 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6355 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6356 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6357 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6358 				data |= CP_MEM_LS_EN;
6359 				if (orig != data)
6360 					WREG32(CP_MEM_SLP_CNTL, data);
6361 			}
6362 		}
6363 
6364 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6365 		data |= 0x00000001;
6366 		data &= 0xfffffffd;
6367 		if (orig != data)
6368 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6369 
6370 		tmp = cik_halt_rlc(rdev);
6371 
6372 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6373 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6374 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6375 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6376 		WREG32(RLC_SERDES_WR_CTRL, data);
6377 
6378 		cik_update_rlc(rdev, tmp);
6379 
6380 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6381 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6382 			data &= ~SM_MODE_MASK;
6383 			data |= SM_MODE(0x2);
6384 			data |= SM_MODE_ENABLE;
6385 			data &= ~CGTS_OVERRIDE;
6386 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6387 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6388 				data &= ~CGTS_LS_OVERRIDE;
6389 			data &= ~ON_MONITOR_ADD_MASK;
6390 			data |= ON_MONITOR_ADD_EN;
6391 			data |= ON_MONITOR_ADD(0x96);
6392 			if (orig != data)
6393 				WREG32(CGTS_SM_CTRL_REG, data);
6394 		}
6395 	} else {
6396 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6397 		data |= 0x00000003;
6398 		if (orig != data)
6399 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6400 
6401 		data = RREG32(RLC_MEM_SLP_CNTL);
6402 		if (data & RLC_MEM_LS_EN) {
6403 			data &= ~RLC_MEM_LS_EN;
6404 			WREG32(RLC_MEM_SLP_CNTL, data);
6405 		}
6406 
6407 		data = RREG32(CP_MEM_SLP_CNTL);
6408 		if (data & CP_MEM_LS_EN) {
6409 			data &= ~CP_MEM_LS_EN;
6410 			WREG32(CP_MEM_SLP_CNTL, data);
6411 		}
6412 
6413 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6414 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6415 		if (orig != data)
6416 			WREG32(CGTS_SM_CTRL_REG, data);
6417 
6418 		tmp = cik_halt_rlc(rdev);
6419 
6420 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6421 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6422 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6423 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6424 		WREG32(RLC_SERDES_WR_CTRL, data);
6425 
6426 		cik_update_rlc(rdev, tmp);
6427 	}
6428 }
6429 
6430 static const u32 mc_cg_registers[] =
6431 {
6432 	MC_HUB_MISC_HUB_CG,
6433 	MC_HUB_MISC_SIP_CG,
6434 	MC_HUB_MISC_VM_CG,
6435 	MC_XPB_CLK_GAT,
6436 	ATC_MISC_CG,
6437 	MC_CITF_MISC_WR_CG,
6438 	MC_CITF_MISC_RD_CG,
6439 	MC_CITF_MISC_VM_CG,
6440 	VM_L2_CG,
6441 };
6442 
cik_enable_mc_ls(struct radeon_device * rdev,bool enable)6443 static void cik_enable_mc_ls(struct radeon_device *rdev,
6444 			     bool enable)
6445 {
6446 	int i;
6447 	u32 orig, data;
6448 
6449 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6450 		orig = data = RREG32(mc_cg_registers[i]);
6451 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6452 			data |= MC_LS_ENABLE;
6453 		else
6454 			data &= ~MC_LS_ENABLE;
6455 		if (data != orig)
6456 			WREG32(mc_cg_registers[i], data);
6457 	}
6458 }
6459 
cik_enable_mc_mgcg(struct radeon_device * rdev,bool enable)6460 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6461 			       bool enable)
6462 {
6463 	int i;
6464 	u32 orig, data;
6465 
6466 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6467 		orig = data = RREG32(mc_cg_registers[i]);
6468 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6469 			data |= MC_CG_ENABLE;
6470 		else
6471 			data &= ~MC_CG_ENABLE;
6472 		if (data != orig)
6473 			WREG32(mc_cg_registers[i], data);
6474 	}
6475 }
6476 
cik_enable_sdma_mgcg(struct radeon_device * rdev,bool enable)6477 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6478 				 bool enable)
6479 {
6480 	u32 orig, data;
6481 
6482 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6483 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6484 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6485 	} else {
6486 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6487 		data |= 0xff000000;
6488 		if (data != orig)
6489 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6490 
6491 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6492 		data |= 0xff000000;
6493 		if (data != orig)
6494 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6495 	}
6496 }
6497 
cik_enable_sdma_mgls(struct radeon_device * rdev,bool enable)6498 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6499 				 bool enable)
6500 {
6501 	u32 orig, data;
6502 
6503 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6504 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6505 		data |= 0x100;
6506 		if (orig != data)
6507 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6508 
6509 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6510 		data |= 0x100;
6511 		if (orig != data)
6512 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6513 	} else {
6514 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6515 		data &= ~0x100;
6516 		if (orig != data)
6517 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6518 
6519 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6520 		data &= ~0x100;
6521 		if (orig != data)
6522 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6523 	}
6524 }
6525 
cik_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)6526 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6527 				bool enable)
6528 {
6529 	u32 orig, data;
6530 
6531 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6532 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6533 		data = 0xfff;
6534 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6535 
6536 		orig = data = RREG32(UVD_CGC_CTRL);
6537 		data |= DCM;
6538 		if (orig != data)
6539 			WREG32(UVD_CGC_CTRL, data);
6540 	} else {
6541 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6542 		data &= ~0xfff;
6543 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6544 
6545 		orig = data = RREG32(UVD_CGC_CTRL);
6546 		data &= ~DCM;
6547 		if (orig != data)
6548 			WREG32(UVD_CGC_CTRL, data);
6549 	}
6550 }
6551 
cik_enable_bif_mgls(struct radeon_device * rdev,bool enable)6552 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6553 			       bool enable)
6554 {
6555 	u32 orig, data;
6556 
6557 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6558 
6559 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6560 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6561 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6562 	else
6563 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6564 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6565 
6566 	if (orig != data)
6567 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6568 }
6569 
cik_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)6570 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6571 				bool enable)
6572 {
6573 	u32 orig, data;
6574 
6575 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6576 
6577 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6578 		data &= ~CLOCK_GATING_DIS;
6579 	else
6580 		data |= CLOCK_GATING_DIS;
6581 
6582 	if (orig != data)
6583 		WREG32(HDP_HOST_PATH_CNTL, data);
6584 }
6585 
cik_enable_hdp_ls(struct radeon_device * rdev,bool enable)6586 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6587 			      bool enable)
6588 {
6589 	u32 orig, data;
6590 
6591 	orig = data = RREG32(HDP_MEM_POWER_LS);
6592 
6593 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6594 		data |= HDP_LS_ENABLE;
6595 	else
6596 		data &= ~HDP_LS_ENABLE;
6597 
6598 	if (orig != data)
6599 		WREG32(HDP_MEM_POWER_LS, data);
6600 }
6601 
cik_update_cg(struct radeon_device * rdev,u32 block,bool enable)6602 void cik_update_cg(struct radeon_device *rdev,
6603 		   u32 block, bool enable)
6604 {
6605 
6606 	if (block & RADEON_CG_BLOCK_GFX) {
6607 		cik_enable_gui_idle_interrupt(rdev, false);
6608 		/* order matters! */
6609 		if (enable) {
6610 			cik_enable_mgcg(rdev, true);
6611 			cik_enable_cgcg(rdev, true);
6612 		} else {
6613 			cik_enable_cgcg(rdev, false);
6614 			cik_enable_mgcg(rdev, false);
6615 		}
6616 		cik_enable_gui_idle_interrupt(rdev, true);
6617 	}
6618 
6619 	if (block & RADEON_CG_BLOCK_MC) {
6620 		if (!(rdev->flags & RADEON_IS_IGP)) {
6621 			cik_enable_mc_mgcg(rdev, enable);
6622 			cik_enable_mc_ls(rdev, enable);
6623 		}
6624 	}
6625 
6626 	if (block & RADEON_CG_BLOCK_SDMA) {
6627 		cik_enable_sdma_mgcg(rdev, enable);
6628 		cik_enable_sdma_mgls(rdev, enable);
6629 	}
6630 
6631 	if (block & RADEON_CG_BLOCK_BIF) {
6632 		cik_enable_bif_mgls(rdev, enable);
6633 	}
6634 
6635 	if (block & RADEON_CG_BLOCK_UVD) {
6636 		if (rdev->has_uvd)
6637 			cik_enable_uvd_mgcg(rdev, enable);
6638 	}
6639 
6640 	if (block & RADEON_CG_BLOCK_HDP) {
6641 		cik_enable_hdp_mgcg(rdev, enable);
6642 		cik_enable_hdp_ls(rdev, enable);
6643 	}
6644 
6645 	if (block & RADEON_CG_BLOCK_VCE) {
6646 		vce_v2_0_enable_mgcg(rdev, enable);
6647 	}
6648 }
6649 
cik_init_cg(struct radeon_device * rdev)6650 static void cik_init_cg(struct radeon_device *rdev)
6651 {
6652 
6653 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6654 
6655 	if (rdev->has_uvd)
6656 		si_init_uvd_internal_cg(rdev);
6657 
6658 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6659 			     RADEON_CG_BLOCK_SDMA |
6660 			     RADEON_CG_BLOCK_BIF |
6661 			     RADEON_CG_BLOCK_UVD |
6662 			     RADEON_CG_BLOCK_HDP), true);
6663 }
6664 
cik_fini_cg(struct radeon_device * rdev)6665 static void cik_fini_cg(struct radeon_device *rdev)
6666 {
6667 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6668 			     RADEON_CG_BLOCK_SDMA |
6669 			     RADEON_CG_BLOCK_BIF |
6670 			     RADEON_CG_BLOCK_UVD |
6671 			     RADEON_CG_BLOCK_HDP), false);
6672 
6673 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6674 }
6675 
cik_enable_sck_slowdown_on_pu(struct radeon_device * rdev,bool enable)6676 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6677 					  bool enable)
6678 {
6679 	u32 data, orig;
6680 
6681 	orig = data = RREG32(RLC_PG_CNTL);
6682 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6683 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6684 	else
6685 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6686 	if (orig != data)
6687 		WREG32(RLC_PG_CNTL, data);
6688 }
6689 
cik_enable_sck_slowdown_on_pd(struct radeon_device * rdev,bool enable)6690 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6691 					  bool enable)
6692 {
6693 	u32 data, orig;
6694 
6695 	orig = data = RREG32(RLC_PG_CNTL);
6696 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6697 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6698 	else
6699 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6700 	if (orig != data)
6701 		WREG32(RLC_PG_CNTL, data);
6702 }
6703 
cik_enable_cp_pg(struct radeon_device * rdev,bool enable)6704 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6705 {
6706 	u32 data, orig;
6707 
6708 	orig = data = RREG32(RLC_PG_CNTL);
6709 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6710 		data &= ~DISABLE_CP_PG;
6711 	else
6712 		data |= DISABLE_CP_PG;
6713 	if (orig != data)
6714 		WREG32(RLC_PG_CNTL, data);
6715 }
6716 
cik_enable_gds_pg(struct radeon_device * rdev,bool enable)6717 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6718 {
6719 	u32 data, orig;
6720 
6721 	orig = data = RREG32(RLC_PG_CNTL);
6722 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6723 		data &= ~DISABLE_GDS_PG;
6724 	else
6725 		data |= DISABLE_GDS_PG;
6726 	if (orig != data)
6727 		WREG32(RLC_PG_CNTL, data);
6728 }
6729 
6730 #define CP_ME_TABLE_SIZE    96
6731 #define CP_ME_TABLE_OFFSET  2048
6732 #define CP_MEC_TABLE_OFFSET 4096
6733 
cik_init_cp_pg_table(struct radeon_device * rdev)6734 void cik_init_cp_pg_table(struct radeon_device *rdev)
6735 {
6736 	volatile u32 *dst_ptr;
6737 	int me, i, max_me = 4;
6738 	u32 bo_offset = 0;
6739 	u32 table_offset, table_size;
6740 
6741 	if (rdev->family == CHIP_KAVERI)
6742 		max_me = 5;
6743 
6744 	if (rdev->rlc.cp_table_ptr == NULL)
6745 		return;
6746 
6747 	/* write the cp table buffer */
6748 	dst_ptr = rdev->rlc.cp_table_ptr;
6749 	for (me = 0; me < max_me; me++) {
6750 		if (rdev->new_fw) {
6751 			const __le32 *fw_data;
6752 			const struct gfx_firmware_header_v1_0 *hdr;
6753 
6754 			if (me == 0) {
6755 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6756 				fw_data = (const __le32 *)
6757 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6758 				table_offset = le32_to_cpu(hdr->jt_offset);
6759 				table_size = le32_to_cpu(hdr->jt_size);
6760 			} else if (me == 1) {
6761 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6762 				fw_data = (const __le32 *)
6763 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6764 				table_offset = le32_to_cpu(hdr->jt_offset);
6765 				table_size = le32_to_cpu(hdr->jt_size);
6766 			} else if (me == 2) {
6767 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6768 				fw_data = (const __le32 *)
6769 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6770 				table_offset = le32_to_cpu(hdr->jt_offset);
6771 				table_size = le32_to_cpu(hdr->jt_size);
6772 			} else if (me == 3) {
6773 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6774 				fw_data = (const __le32 *)
6775 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6776 				table_offset = le32_to_cpu(hdr->jt_offset);
6777 				table_size = le32_to_cpu(hdr->jt_size);
6778 			} else {
6779 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6780 				fw_data = (const __le32 *)
6781 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6782 				table_offset = le32_to_cpu(hdr->jt_offset);
6783 				table_size = le32_to_cpu(hdr->jt_size);
6784 			}
6785 
6786 			for (i = 0; i < table_size; i ++) {
6787 				dst_ptr[bo_offset + i] =
6788 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6789 			}
6790 			bo_offset += table_size;
6791 		} else {
6792 			const __be32 *fw_data;
6793 			table_size = CP_ME_TABLE_SIZE;
6794 
6795 			if (me == 0) {
6796 				fw_data = (const __be32 *)rdev->ce_fw->data;
6797 				table_offset = CP_ME_TABLE_OFFSET;
6798 			} else if (me == 1) {
6799 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6800 				table_offset = CP_ME_TABLE_OFFSET;
6801 			} else if (me == 2) {
6802 				fw_data = (const __be32 *)rdev->me_fw->data;
6803 				table_offset = CP_ME_TABLE_OFFSET;
6804 			} else {
6805 				fw_data = (const __be32 *)rdev->mec_fw->data;
6806 				table_offset = CP_MEC_TABLE_OFFSET;
6807 			}
6808 
6809 			for (i = 0; i < table_size; i ++) {
6810 				dst_ptr[bo_offset + i] =
6811 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6812 			}
6813 			bo_offset += table_size;
6814 		}
6815 	}
6816 }
6817 
cik_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)6818 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6819 				bool enable)
6820 {
6821 	u32 data, orig;
6822 
6823 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6824 		orig = data = RREG32(RLC_PG_CNTL);
6825 		data |= GFX_PG_ENABLE;
6826 		if (orig != data)
6827 			WREG32(RLC_PG_CNTL, data);
6828 
6829 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6830 		data |= AUTO_PG_EN;
6831 		if (orig != data)
6832 			WREG32(RLC_AUTO_PG_CTRL, data);
6833 	} else {
6834 		orig = data = RREG32(RLC_PG_CNTL);
6835 		data &= ~GFX_PG_ENABLE;
6836 		if (orig != data)
6837 			WREG32(RLC_PG_CNTL, data);
6838 
6839 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6840 		data &= ~AUTO_PG_EN;
6841 		if (orig != data)
6842 			WREG32(RLC_AUTO_PG_CTRL, data);
6843 
6844 		data = RREG32(DB_RENDER_CONTROL);
6845 	}
6846 }
6847 
cik_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)6848 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6849 {
6850 	u32 mask = 0, tmp, tmp1;
6851 	int i;
6852 
6853 	cik_select_se_sh(rdev, se, sh);
6854 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6855 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6856 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6857 
6858 	tmp &= 0xffff0000;
6859 
6860 	tmp |= tmp1;
6861 	tmp >>= 16;
6862 
6863 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6864 		mask <<= 1;
6865 		mask |= 1;
6866 	}
6867 
6868 	return (~tmp) & mask;
6869 }
6870 
cik_init_ao_cu_mask(struct radeon_device * rdev)6871 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6872 {
6873 	u32 i, j, k, active_cu_number = 0;
6874 	u32 mask, counter, cu_bitmap;
6875 	u32 tmp = 0;
6876 
6877 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6878 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6879 			mask = 1;
6880 			cu_bitmap = 0;
6881 			counter = 0;
6882 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6883 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6884 					if (counter < 2)
6885 						cu_bitmap |= mask;
6886 					counter ++;
6887 				}
6888 				mask <<= 1;
6889 			}
6890 
6891 			active_cu_number += counter;
6892 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6893 		}
6894 	}
6895 
6896 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6897 
6898 	tmp = RREG32(RLC_MAX_PG_CU);
6899 	tmp &= ~MAX_PU_CU_MASK;
6900 	tmp |= MAX_PU_CU(active_cu_number);
6901 	WREG32(RLC_MAX_PG_CU, tmp);
6902 }
6903 
cik_enable_gfx_static_mgpg(struct radeon_device * rdev,bool enable)6904 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6905 				       bool enable)
6906 {
6907 	u32 data, orig;
6908 
6909 	orig = data = RREG32(RLC_PG_CNTL);
6910 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6911 		data |= STATIC_PER_CU_PG_ENABLE;
6912 	else
6913 		data &= ~STATIC_PER_CU_PG_ENABLE;
6914 	if (orig != data)
6915 		WREG32(RLC_PG_CNTL, data);
6916 }
6917 
cik_enable_gfx_dynamic_mgpg(struct radeon_device * rdev,bool enable)6918 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6919 					bool enable)
6920 {
6921 	u32 data, orig;
6922 
6923 	orig = data = RREG32(RLC_PG_CNTL);
6924 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6925 		data |= DYN_PER_CU_PG_ENABLE;
6926 	else
6927 		data &= ~DYN_PER_CU_PG_ENABLE;
6928 	if (orig != data)
6929 		WREG32(RLC_PG_CNTL, data);
6930 }
6931 
6932 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6933 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6934 
cik_init_gfx_cgpg(struct radeon_device * rdev)6935 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6936 {
6937 	u32 data, orig;
6938 	u32 i;
6939 
6940 	if (rdev->rlc.cs_data) {
6941 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6942 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6943 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6944 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6945 	} else {
6946 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6947 		for (i = 0; i < 3; i++)
6948 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6949 	}
6950 	if (rdev->rlc.reg_list) {
6951 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6952 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6953 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6954 	}
6955 
6956 	orig = data = RREG32(RLC_PG_CNTL);
6957 	data |= GFX_PG_SRC;
6958 	if (orig != data)
6959 		WREG32(RLC_PG_CNTL, data);
6960 
6961 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6962 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6963 
6964 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6965 	data &= ~IDLE_POLL_COUNT_MASK;
6966 	data |= IDLE_POLL_COUNT(0x60);
6967 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6968 
6969 	data = 0x10101010;
6970 	WREG32(RLC_PG_DELAY, data);
6971 
6972 	data = RREG32(RLC_PG_DELAY_2);
6973 	data &= ~0xff;
6974 	data |= 0x3;
6975 	WREG32(RLC_PG_DELAY_2, data);
6976 
6977 	data = RREG32(RLC_AUTO_PG_CTRL);
6978 	data &= ~GRBM_REG_SGIT_MASK;
6979 	data |= GRBM_REG_SGIT(0x700);
6980 	WREG32(RLC_AUTO_PG_CTRL, data);
6981 
6982 }
6983 
cik_update_gfx_pg(struct radeon_device * rdev,bool enable)6984 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6985 {
6986 	cik_enable_gfx_cgpg(rdev, enable);
6987 	cik_enable_gfx_static_mgpg(rdev, enable);
6988 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6989 }
6990 
cik_get_csb_size(struct radeon_device * rdev)6991 u32 cik_get_csb_size(struct radeon_device *rdev)
6992 {
6993 	u32 count = 0;
6994 	const struct cs_section_def *sect = NULL;
6995 	const struct cs_extent_def *ext = NULL;
6996 
6997 	if (rdev->rlc.cs_data == NULL)
6998 		return 0;
6999 
7000 	/* begin clear state */
7001 	count += 2;
7002 	/* context control state */
7003 	count += 3;
7004 
7005 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7006 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7007 			if (sect->id == SECT_CONTEXT)
7008 				count += 2 + ext->reg_count;
7009 			else
7010 				return 0;
7011 		}
7012 	}
7013 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7014 	count += 4;
7015 	/* end clear state */
7016 	count += 2;
7017 	/* clear state */
7018 	count += 2;
7019 
7020 	return count;
7021 }
7022 
cik_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)7023 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7024 {
7025 	u32 count = 0, i;
7026 	const struct cs_section_def *sect = NULL;
7027 	const struct cs_extent_def *ext = NULL;
7028 
7029 	if (rdev->rlc.cs_data == NULL)
7030 		return;
7031 	if (buffer == NULL)
7032 		return;
7033 
7034 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7035 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7036 
7037 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7038 	buffer[count++] = cpu_to_le32(0x80000000);
7039 	buffer[count++] = cpu_to_le32(0x80000000);
7040 
7041 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7042 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7043 			if (sect->id == SECT_CONTEXT) {
7044 				buffer[count++] =
7045 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7046 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7047 				for (i = 0; i < ext->reg_count; i++)
7048 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7049 			} else {
7050 				return;
7051 			}
7052 		}
7053 	}
7054 
7055 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7056 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7057 	switch (rdev->family) {
7058 	case CHIP_BONAIRE:
7059 		buffer[count++] = cpu_to_le32(0x16000012);
7060 		buffer[count++] = cpu_to_le32(0x00000000);
7061 		break;
7062 	case CHIP_KAVERI:
7063 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7064 		buffer[count++] = cpu_to_le32(0x00000000);
7065 		break;
7066 	case CHIP_KABINI:
7067 	case CHIP_MULLINS:
7068 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7069 		buffer[count++] = cpu_to_le32(0x00000000);
7070 		break;
7071 	case CHIP_HAWAII:
7072 		buffer[count++] = cpu_to_le32(0x3a00161a);
7073 		buffer[count++] = cpu_to_le32(0x0000002e);
7074 		break;
7075 	default:
7076 		buffer[count++] = cpu_to_le32(0x00000000);
7077 		buffer[count++] = cpu_to_le32(0x00000000);
7078 		break;
7079 	}
7080 
7081 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7082 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7083 
7084 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7085 	buffer[count++] = cpu_to_le32(0);
7086 }
7087 
cik_init_pg(struct radeon_device * rdev)7088 static void cik_init_pg(struct radeon_device *rdev)
7089 {
7090 	if (rdev->pg_flags) {
7091 		cik_enable_sck_slowdown_on_pu(rdev, true);
7092 		cik_enable_sck_slowdown_on_pd(rdev, true);
7093 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7094 			cik_init_gfx_cgpg(rdev);
7095 			cik_enable_cp_pg(rdev, true);
7096 			cik_enable_gds_pg(rdev, true);
7097 		}
7098 		cik_init_ao_cu_mask(rdev);
7099 		cik_update_gfx_pg(rdev, true);
7100 	}
7101 }
7102 
cik_fini_pg(struct radeon_device * rdev)7103 static void cik_fini_pg(struct radeon_device *rdev)
7104 {
7105 	if (rdev->pg_flags) {
7106 		cik_update_gfx_pg(rdev, false);
7107 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7108 			cik_enable_cp_pg(rdev, false);
7109 			cik_enable_gds_pg(rdev, false);
7110 		}
7111 	}
7112 }
7113 
7114 /*
7115  * Interrupts
7116  * Starting with r6xx, interrupts are handled via a ring buffer.
7117  * Ring buffers are areas of GPU accessible memory that the GPU
7118  * writes interrupt vectors into and the host reads vectors out of.
7119  * There is a rptr (read pointer) that determines where the
7120  * host is currently reading, and a wptr (write pointer)
7121  * which determines where the GPU has written.  When the
7122  * pointers are equal, the ring is idle.  When the GPU
7123  * writes vectors to the ring buffer, it increments the
7124  * wptr.  When there is an interrupt, the host then starts
7125  * fetching commands and processing them until the pointers are
7126  * equal again at which point it updates the rptr.
7127  */
7128 
7129 /**
7130  * cik_enable_interrupts - Enable the interrupt ring buffer
7131  *
7132  * @rdev: radeon_device pointer
7133  *
7134  * Enable the interrupt ring buffer (CIK).
7135  */
cik_enable_interrupts(struct radeon_device * rdev)7136 static void cik_enable_interrupts(struct radeon_device *rdev)
7137 {
7138 	u32 ih_cntl = RREG32(IH_CNTL);
7139 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7140 
7141 	ih_cntl |= ENABLE_INTR;
7142 	ih_rb_cntl |= IH_RB_ENABLE;
7143 	WREG32(IH_CNTL, ih_cntl);
7144 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7145 	rdev->ih.enabled = true;
7146 }
7147 
7148 /**
7149  * cik_disable_interrupts - Disable the interrupt ring buffer
7150  *
7151  * @rdev: radeon_device pointer
7152  *
7153  * Disable the interrupt ring buffer (CIK).
7154  */
cik_disable_interrupts(struct radeon_device * rdev)7155 static void cik_disable_interrupts(struct radeon_device *rdev)
7156 {
7157 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7158 	u32 ih_cntl = RREG32(IH_CNTL);
7159 
7160 	ih_rb_cntl &= ~IH_RB_ENABLE;
7161 	ih_cntl &= ~ENABLE_INTR;
7162 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7163 	WREG32(IH_CNTL, ih_cntl);
7164 	/* set rptr, wptr to 0 */
7165 	WREG32(IH_RB_RPTR, 0);
7166 	WREG32(IH_RB_WPTR, 0);
7167 	rdev->ih.enabled = false;
7168 	rdev->ih.rptr = 0;
7169 }
7170 
7171 /**
7172  * cik_disable_interrupt_state - Disable all interrupt sources
7173  *
7174  * @rdev: radeon_device pointer
7175  *
7176  * Clear all interrupt enable bits used by the driver (CIK).
7177  */
cik_disable_interrupt_state(struct radeon_device * rdev)7178 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7179 {
7180 	u32 tmp;
7181 
7182 	/* gfx ring */
7183 	tmp = RREG32(CP_INT_CNTL_RING0) &
7184 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7185 	WREG32(CP_INT_CNTL_RING0, tmp);
7186 	/* sdma */
7187 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7188 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7189 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7190 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7191 	/* compute queues */
7192 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7193 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7194 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7195 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7196 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7197 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7198 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7199 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7200 	/* grbm */
7201 	WREG32(GRBM_INT_CNTL, 0);
7202 	/* vline/vblank, etc. */
7203 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7204 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7205 	if (rdev->num_crtc >= 4) {
7206 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7207 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7208 	}
7209 	if (rdev->num_crtc >= 6) {
7210 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7211 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7212 	}
7213 	/* pflip */
7214 	if (rdev->num_crtc >= 2) {
7215 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7216 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7217 	}
7218 	if (rdev->num_crtc >= 4) {
7219 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7220 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7221 	}
7222 	if (rdev->num_crtc >= 6) {
7223 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7224 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7225 	}
7226 
7227 	/* dac hotplug */
7228 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7229 
7230 	/* digital hotplug */
7231 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7232 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7233 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7234 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7235 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7236 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7237 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7238 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7239 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7240 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7241 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7242 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7243 
7244 }
7245 
7246 /**
7247  * cik_irq_init - init and enable the interrupt ring
7248  *
7249  * @rdev: radeon_device pointer
7250  *
7251  * Allocate a ring buffer for the interrupt controller,
7252  * enable the RLC, disable interrupts, enable the IH
7253  * ring buffer and enable it (CIK).
7254  * Called at device load and reume.
7255  * Returns 0 for success, errors for failure.
7256  */
cik_irq_init(struct radeon_device * rdev)7257 static int cik_irq_init(struct radeon_device *rdev)
7258 {
7259 	int ret = 0;
7260 	int rb_bufsz;
7261 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7262 
7263 	/* allocate ring */
7264 	ret = r600_ih_ring_alloc(rdev);
7265 	if (ret)
7266 		return ret;
7267 
7268 	/* disable irqs */
7269 	cik_disable_interrupts(rdev);
7270 
7271 	/* init rlc */
7272 	ret = cik_rlc_resume(rdev);
7273 	if (ret) {
7274 		r600_ih_ring_fini(rdev);
7275 		return ret;
7276 	}
7277 
7278 	/* setup interrupt control */
7279 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7280 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7281 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7282 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7283 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7284 	 */
7285 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7286 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7287 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7288 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7289 
7290 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7291 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7292 
7293 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7294 		      IH_WPTR_OVERFLOW_CLEAR |
7295 		      (rb_bufsz << 1));
7296 
7297 	if (rdev->wb.enabled)
7298 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7299 
7300 	/* set the writeback address whether it's enabled or not */
7301 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7302 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7303 
7304 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7305 
7306 	/* set rptr, wptr to 0 */
7307 	WREG32(IH_RB_RPTR, 0);
7308 	WREG32(IH_RB_WPTR, 0);
7309 
7310 	/* Default settings for IH_CNTL (disabled at first) */
7311 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7312 	/* RPTR_REARM only works if msi's are enabled */
7313 	if (rdev->msi_enabled)
7314 		ih_cntl |= RPTR_REARM;
7315 	WREG32(IH_CNTL, ih_cntl);
7316 
7317 	/* force the active interrupt state to all disabled */
7318 	cik_disable_interrupt_state(rdev);
7319 
7320 	pci_set_master(rdev->pdev);
7321 
7322 	/* enable irqs */
7323 	cik_enable_interrupts(rdev);
7324 
7325 	return ret;
7326 }
7327 
7328 /**
7329  * cik_irq_set - enable/disable interrupt sources
7330  *
7331  * @rdev: radeon_device pointer
7332  *
7333  * Enable interrupt sources on the GPU (vblanks, hpd,
7334  * etc.) (CIK).
7335  * Returns 0 for success, errors for failure.
7336  */
cik_irq_set(struct radeon_device * rdev)7337 int cik_irq_set(struct radeon_device *rdev)
7338 {
7339 	u32 cp_int_cntl;
7340 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7341 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7342 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7343 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7344 	u32 grbm_int_cntl = 0;
7345 	u32 dma_cntl, dma_cntl1;
7346 
7347 	if (!rdev->irq.installed) {
7348 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7349 		return -EINVAL;
7350 	}
7351 	/* don't enable anything if the ih is disabled */
7352 	if (!rdev->ih.enabled) {
7353 		cik_disable_interrupts(rdev);
7354 		/* force the active interrupt state to all disabled */
7355 		cik_disable_interrupt_state(rdev);
7356 		return 0;
7357 	}
7358 
7359 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7360 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7361 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7362 
7363 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7364 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7365 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7366 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7367 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7368 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7369 
7370 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7371 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7372 
7373 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7374 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7375 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7376 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7377 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7378 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7379 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7380 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7381 
7382 	/* enable CP interrupts on all rings */
7383 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7384 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7385 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7386 	}
7387 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7388 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7389 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7390 		if (ring->me == 1) {
7391 			switch (ring->pipe) {
7392 			case 0:
7393 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7394 				break;
7395 			case 1:
7396 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7397 				break;
7398 			case 2:
7399 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7400 				break;
7401 			case 3:
7402 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7403 				break;
7404 			default:
7405 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7406 				break;
7407 			}
7408 		} else if (ring->me == 2) {
7409 			switch (ring->pipe) {
7410 			case 0:
7411 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7412 				break;
7413 			case 1:
7414 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7415 				break;
7416 			case 2:
7417 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7418 				break;
7419 			case 3:
7420 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7421 				break;
7422 			default:
7423 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7424 				break;
7425 			}
7426 		} else {
7427 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7428 		}
7429 	}
7430 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7431 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7432 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7433 		if (ring->me == 1) {
7434 			switch (ring->pipe) {
7435 			case 0:
7436 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7437 				break;
7438 			case 1:
7439 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7440 				break;
7441 			case 2:
7442 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7443 				break;
7444 			case 3:
7445 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7446 				break;
7447 			default:
7448 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7449 				break;
7450 			}
7451 		} else if (ring->me == 2) {
7452 			switch (ring->pipe) {
7453 			case 0:
7454 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7455 				break;
7456 			case 1:
7457 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7458 				break;
7459 			case 2:
7460 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7461 				break;
7462 			case 3:
7463 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7464 				break;
7465 			default:
7466 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7467 				break;
7468 			}
7469 		} else {
7470 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7471 		}
7472 	}
7473 
7474 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7475 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7476 		dma_cntl |= TRAP_ENABLE;
7477 	}
7478 
7479 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7480 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7481 		dma_cntl1 |= TRAP_ENABLE;
7482 	}
7483 
7484 	if (rdev->irq.crtc_vblank_int[0] ||
7485 	    atomic_read(&rdev->irq.pflip[0])) {
7486 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7487 		crtc1 |= VBLANK_INTERRUPT_MASK;
7488 	}
7489 	if (rdev->irq.crtc_vblank_int[1] ||
7490 	    atomic_read(&rdev->irq.pflip[1])) {
7491 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7492 		crtc2 |= VBLANK_INTERRUPT_MASK;
7493 	}
7494 	if (rdev->irq.crtc_vblank_int[2] ||
7495 	    atomic_read(&rdev->irq.pflip[2])) {
7496 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7497 		crtc3 |= VBLANK_INTERRUPT_MASK;
7498 	}
7499 	if (rdev->irq.crtc_vblank_int[3] ||
7500 	    atomic_read(&rdev->irq.pflip[3])) {
7501 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7502 		crtc4 |= VBLANK_INTERRUPT_MASK;
7503 	}
7504 	if (rdev->irq.crtc_vblank_int[4] ||
7505 	    atomic_read(&rdev->irq.pflip[4])) {
7506 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7507 		crtc5 |= VBLANK_INTERRUPT_MASK;
7508 	}
7509 	if (rdev->irq.crtc_vblank_int[5] ||
7510 	    atomic_read(&rdev->irq.pflip[5])) {
7511 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7512 		crtc6 |= VBLANK_INTERRUPT_MASK;
7513 	}
7514 	if (rdev->irq.hpd[0]) {
7515 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7516 		hpd1 |= DC_HPDx_INT_EN;
7517 	}
7518 	if (rdev->irq.hpd[1]) {
7519 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7520 		hpd2 |= DC_HPDx_INT_EN;
7521 	}
7522 	if (rdev->irq.hpd[2]) {
7523 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7524 		hpd3 |= DC_HPDx_INT_EN;
7525 	}
7526 	if (rdev->irq.hpd[3]) {
7527 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7528 		hpd4 |= DC_HPDx_INT_EN;
7529 	}
7530 	if (rdev->irq.hpd[4]) {
7531 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7532 		hpd5 |= DC_HPDx_INT_EN;
7533 	}
7534 	if (rdev->irq.hpd[5]) {
7535 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7536 		hpd6 |= DC_HPDx_INT_EN;
7537 	}
7538 
7539 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7540 
7541 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7542 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7543 
7544 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7545 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7546 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7547 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7548 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7549 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7550 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7551 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7552 
7553 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7554 
7555 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7556 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7557 	if (rdev->num_crtc >= 4) {
7558 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7559 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7560 	}
7561 	if (rdev->num_crtc >= 6) {
7562 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7563 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7564 	}
7565 
7566 	if (rdev->num_crtc >= 2) {
7567 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7568 		       GRPH_PFLIP_INT_MASK);
7569 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7570 		       GRPH_PFLIP_INT_MASK);
7571 	}
7572 	if (rdev->num_crtc >= 4) {
7573 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7574 		       GRPH_PFLIP_INT_MASK);
7575 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7576 		       GRPH_PFLIP_INT_MASK);
7577 	}
7578 	if (rdev->num_crtc >= 6) {
7579 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7580 		       GRPH_PFLIP_INT_MASK);
7581 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7582 		       GRPH_PFLIP_INT_MASK);
7583 	}
7584 
7585 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7586 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7587 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7588 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7589 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7590 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7591 
7592 	/* posting read */
7593 	RREG32(SRBM_STATUS);
7594 
7595 	return 0;
7596 }
7597 
7598 /**
7599  * cik_irq_ack - ack interrupt sources
7600  *
7601  * @rdev: radeon_device pointer
7602  *
7603  * Ack interrupt sources on the GPU (vblanks, hpd,
7604  * etc.) (CIK).  Certain interrupts sources are sw
7605  * generated and do not require an explicit ack.
7606  */
cik_irq_ack(struct radeon_device * rdev)7607 static inline void cik_irq_ack(struct radeon_device *rdev)
7608 {
7609 	u32 tmp;
7610 
7611 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7612 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7613 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7614 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7615 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7616 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7617 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7618 
7619 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7620 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7621 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7622 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7623 	if (rdev->num_crtc >= 4) {
7624 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7625 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7626 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7627 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7628 	}
7629 	if (rdev->num_crtc >= 6) {
7630 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7631 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7632 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7633 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7634 	}
7635 
7636 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7637 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7638 		       GRPH_PFLIP_INT_CLEAR);
7639 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7640 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7641 		       GRPH_PFLIP_INT_CLEAR);
7642 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7643 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7644 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7645 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7646 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7647 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7648 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7649 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7650 
7651 	if (rdev->num_crtc >= 4) {
7652 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7653 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7654 			       GRPH_PFLIP_INT_CLEAR);
7655 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7656 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7657 			       GRPH_PFLIP_INT_CLEAR);
7658 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7659 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7660 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7661 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7662 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7663 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7664 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7665 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7666 	}
7667 
7668 	if (rdev->num_crtc >= 6) {
7669 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7670 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7671 			       GRPH_PFLIP_INT_CLEAR);
7672 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7673 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7674 			       GRPH_PFLIP_INT_CLEAR);
7675 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7676 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7677 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7678 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7679 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7680 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7681 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7682 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7683 	}
7684 
7685 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7686 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7687 		tmp |= DC_HPDx_INT_ACK;
7688 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7689 	}
7690 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7691 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7692 		tmp |= DC_HPDx_INT_ACK;
7693 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7694 	}
7695 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7696 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7697 		tmp |= DC_HPDx_INT_ACK;
7698 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7699 	}
7700 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7701 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7702 		tmp |= DC_HPDx_INT_ACK;
7703 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7704 	}
7705 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7706 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7707 		tmp |= DC_HPDx_INT_ACK;
7708 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7709 	}
7710 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7711 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7712 		tmp |= DC_HPDx_INT_ACK;
7713 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7714 	}
7715 }
7716 
7717 /**
7718  * cik_irq_disable - disable interrupts
7719  *
7720  * @rdev: radeon_device pointer
7721  *
7722  * Disable interrupts on the hw (CIK).
7723  */
cik_irq_disable(struct radeon_device * rdev)7724 static void cik_irq_disable(struct radeon_device *rdev)
7725 {
7726 	cik_disable_interrupts(rdev);
7727 	/* Wait and acknowledge irq */
7728 	mdelay(1);
7729 	cik_irq_ack(rdev);
7730 	cik_disable_interrupt_state(rdev);
7731 }
7732 
7733 /**
7734  * cik_irq_disable - disable interrupts for suspend
7735  *
7736  * @rdev: radeon_device pointer
7737  *
7738  * Disable interrupts and stop the RLC (CIK).
7739  * Used for suspend.
7740  */
cik_irq_suspend(struct radeon_device * rdev)7741 static void cik_irq_suspend(struct radeon_device *rdev)
7742 {
7743 	cik_irq_disable(rdev);
7744 	cik_rlc_stop(rdev);
7745 }
7746 
7747 /**
7748  * cik_irq_fini - tear down interrupt support
7749  *
7750  * @rdev: radeon_device pointer
7751  *
7752  * Disable interrupts on the hw and free the IH ring
7753  * buffer (CIK).
7754  * Used for driver unload.
7755  */
cik_irq_fini(struct radeon_device * rdev)7756 static void cik_irq_fini(struct radeon_device *rdev)
7757 {
7758 	cik_irq_suspend(rdev);
7759 	r600_ih_ring_fini(rdev);
7760 }
7761 
7762 /**
7763  * cik_get_ih_wptr - get the IH ring buffer wptr
7764  *
7765  * @rdev: radeon_device pointer
7766  *
7767  * Get the IH ring buffer wptr from either the register
7768  * or the writeback memory buffer (CIK).  Also check for
7769  * ring buffer overflow and deal with it.
7770  * Used by cik_irq_process().
7771  * Returns the value of the wptr.
7772  */
cik_get_ih_wptr(struct radeon_device * rdev)7773 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7774 {
7775 	u32 wptr, tmp;
7776 
7777 	if (rdev->wb.enabled)
7778 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7779 	else
7780 		wptr = RREG32(IH_RB_WPTR);
7781 
7782 	if (wptr & RB_OVERFLOW) {
7783 		wptr &= ~RB_OVERFLOW;
7784 		/* When a ring buffer overflow happen start parsing interrupt
7785 		 * from the last not overwritten vector (wptr + 16). Hopefully
7786 		 * this should allow us to catchup.
7787 		 */
7788 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7789 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7790 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7791 		tmp = RREG32(IH_RB_CNTL);
7792 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7793 		WREG32(IH_RB_CNTL, tmp);
7794 	}
7795 	return (wptr & rdev->ih.ptr_mask);
7796 }
7797 
7798 /*        CIK IV Ring
7799  * Each IV ring entry is 128 bits:
7800  * [7:0]    - interrupt source id
7801  * [31:8]   - reserved
7802  * [59:32]  - interrupt source data
7803  * [63:60]  - reserved
7804  * [71:64]  - RINGID
7805  *            CP:
7806  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7807  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7808  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7809  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7810  *            PIPE_ID - ME0 0=3D
7811  *                    - ME1&2 compute dispatcher (4 pipes each)
7812  *            SDMA:
7813  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7814  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7815  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7816  * [79:72]  - VMID
7817  * [95:80]  - PASID
7818  * [127:96] - reserved
7819  */
7820 /**
7821  * cik_irq_process - interrupt handler
7822  *
7823  * @rdev: radeon_device pointer
7824  *
7825  * Interrupt hander (CIK).  Walk the IH ring,
7826  * ack interrupts and schedule work to handle
7827  * interrupt events.
7828  * Returns irq process return code.
7829  */
cik_irq_process(struct radeon_device * rdev)7830 int cik_irq_process(struct radeon_device *rdev)
7831 {
7832 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7833 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7834 	u32 wptr;
7835 	u32 rptr;
7836 	u32 src_id, src_data, ring_id;
7837 	u8 me_id, pipe_id, queue_id;
7838 	u32 ring_index;
7839 	bool queue_hotplug = false;
7840 	bool queue_reset = false;
7841 	u32 addr, status, mc_client;
7842 	bool queue_thermal = false;
7843 
7844 	if (!rdev->ih.enabled || rdev->shutdown)
7845 		return IRQ_NONE;
7846 
7847 	wptr = cik_get_ih_wptr(rdev);
7848 
7849 restart_ih:
7850 	/* is somebody else already processing irqs? */
7851 	if (atomic_xchg(&rdev->ih.lock, 1))
7852 		return IRQ_NONE;
7853 
7854 	rptr = rdev->ih.rptr;
7855 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7856 
7857 	/* Order reading of wptr vs. reading of IH ring data */
7858 	rmb();
7859 
7860 	/* display interrupts */
7861 	cik_irq_ack(rdev);
7862 
7863 	while (rptr != wptr) {
7864 		/* wptr/rptr are in bytes! */
7865 		ring_index = rptr / 4;
7866 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7867 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7868 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7869 
7870 		switch (src_id) {
7871 		case 1: /* D1 vblank/vline */
7872 			switch (src_data) {
7873 			case 0: /* D1 vblank */
7874 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7875 					if (rdev->irq.crtc_vblank_int[0]) {
7876 						drm_handle_vblank(rdev->ddev, 0);
7877 						rdev->pm.vblank_sync = true;
7878 						wake_up(&rdev->irq.vblank_queue);
7879 					}
7880 					if (atomic_read(&rdev->irq.pflip[0]))
7881 						radeon_crtc_handle_vblank(rdev, 0);
7882 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7883 					DRM_DEBUG("IH: D1 vblank\n");
7884 				}
7885 				break;
7886 			case 1: /* D1 vline */
7887 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7888 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7889 					DRM_DEBUG("IH: D1 vline\n");
7890 				}
7891 				break;
7892 			default:
7893 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7894 				break;
7895 			}
7896 			break;
7897 		case 2: /* D2 vblank/vline */
7898 			switch (src_data) {
7899 			case 0: /* D2 vblank */
7900 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7901 					if (rdev->irq.crtc_vblank_int[1]) {
7902 						drm_handle_vblank(rdev->ddev, 1);
7903 						rdev->pm.vblank_sync = true;
7904 						wake_up(&rdev->irq.vblank_queue);
7905 					}
7906 					if (atomic_read(&rdev->irq.pflip[1]))
7907 						radeon_crtc_handle_vblank(rdev, 1);
7908 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7909 					DRM_DEBUG("IH: D2 vblank\n");
7910 				}
7911 				break;
7912 			case 1: /* D2 vline */
7913 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7914 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7915 					DRM_DEBUG("IH: D2 vline\n");
7916 				}
7917 				break;
7918 			default:
7919 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7920 				break;
7921 			}
7922 			break;
7923 		case 3: /* D3 vblank/vline */
7924 			switch (src_data) {
7925 			case 0: /* D3 vblank */
7926 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7927 					if (rdev->irq.crtc_vblank_int[2]) {
7928 						drm_handle_vblank(rdev->ddev, 2);
7929 						rdev->pm.vblank_sync = true;
7930 						wake_up(&rdev->irq.vblank_queue);
7931 					}
7932 					if (atomic_read(&rdev->irq.pflip[2]))
7933 						radeon_crtc_handle_vblank(rdev, 2);
7934 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7935 					DRM_DEBUG("IH: D3 vblank\n");
7936 				}
7937 				break;
7938 			case 1: /* D3 vline */
7939 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7940 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7941 					DRM_DEBUG("IH: D3 vline\n");
7942 				}
7943 				break;
7944 			default:
7945 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7946 				break;
7947 			}
7948 			break;
7949 		case 4: /* D4 vblank/vline */
7950 			switch (src_data) {
7951 			case 0: /* D4 vblank */
7952 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7953 					if (rdev->irq.crtc_vblank_int[3]) {
7954 						drm_handle_vblank(rdev->ddev, 3);
7955 						rdev->pm.vblank_sync = true;
7956 						wake_up(&rdev->irq.vblank_queue);
7957 					}
7958 					if (atomic_read(&rdev->irq.pflip[3]))
7959 						radeon_crtc_handle_vblank(rdev, 3);
7960 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7961 					DRM_DEBUG("IH: D4 vblank\n");
7962 				}
7963 				break;
7964 			case 1: /* D4 vline */
7965 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7966 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7967 					DRM_DEBUG("IH: D4 vline\n");
7968 				}
7969 				break;
7970 			default:
7971 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7972 				break;
7973 			}
7974 			break;
7975 		case 5: /* D5 vblank/vline */
7976 			switch (src_data) {
7977 			case 0: /* D5 vblank */
7978 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7979 					if (rdev->irq.crtc_vblank_int[4]) {
7980 						drm_handle_vblank(rdev->ddev, 4);
7981 						rdev->pm.vblank_sync = true;
7982 						wake_up(&rdev->irq.vblank_queue);
7983 					}
7984 					if (atomic_read(&rdev->irq.pflip[4]))
7985 						radeon_crtc_handle_vblank(rdev, 4);
7986 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7987 					DRM_DEBUG("IH: D5 vblank\n");
7988 				}
7989 				break;
7990 			case 1: /* D5 vline */
7991 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7992 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7993 					DRM_DEBUG("IH: D5 vline\n");
7994 				}
7995 				break;
7996 			default:
7997 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7998 				break;
7999 			}
8000 			break;
8001 		case 6: /* D6 vblank/vline */
8002 			switch (src_data) {
8003 			case 0: /* D6 vblank */
8004 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
8005 					if (rdev->irq.crtc_vblank_int[5]) {
8006 						drm_handle_vblank(rdev->ddev, 5);
8007 						rdev->pm.vblank_sync = true;
8008 						wake_up(&rdev->irq.vblank_queue);
8009 					}
8010 					if (atomic_read(&rdev->irq.pflip[5]))
8011 						radeon_crtc_handle_vblank(rdev, 5);
8012 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8013 					DRM_DEBUG("IH: D6 vblank\n");
8014 				}
8015 				break;
8016 			case 1: /* D6 vline */
8017 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
8018 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8019 					DRM_DEBUG("IH: D6 vline\n");
8020 				}
8021 				break;
8022 			default:
8023 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8024 				break;
8025 			}
8026 			break;
8027 		case 8: /* D1 page flip */
8028 		case 10: /* D2 page flip */
8029 		case 12: /* D3 page flip */
8030 		case 14: /* D4 page flip */
8031 		case 16: /* D5 page flip */
8032 		case 18: /* D6 page flip */
8033 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8034 			if (radeon_use_pflipirq > 0)
8035 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8036 			break;
8037 		case 42: /* HPD hotplug */
8038 			switch (src_data) {
8039 			case 0:
8040 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8041 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8042 					queue_hotplug = true;
8043 					DRM_DEBUG("IH: HPD1\n");
8044 				}
8045 				break;
8046 			case 1:
8047 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8048 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8049 					queue_hotplug = true;
8050 					DRM_DEBUG("IH: HPD2\n");
8051 				}
8052 				break;
8053 			case 2:
8054 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8055 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8056 					queue_hotplug = true;
8057 					DRM_DEBUG("IH: HPD3\n");
8058 				}
8059 				break;
8060 			case 3:
8061 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8062 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8063 					queue_hotplug = true;
8064 					DRM_DEBUG("IH: HPD4\n");
8065 				}
8066 				break;
8067 			case 4:
8068 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8069 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8070 					queue_hotplug = true;
8071 					DRM_DEBUG("IH: HPD5\n");
8072 				}
8073 				break;
8074 			case 5:
8075 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8076 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8077 					queue_hotplug = true;
8078 					DRM_DEBUG("IH: HPD6\n");
8079 				}
8080 				break;
8081 			default:
8082 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8083 				break;
8084 			}
8085 			break;
8086 		case 124: /* UVD */
8087 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8088 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8089 			break;
8090 		case 146:
8091 		case 147:
8092 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8093 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8094 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8095 			/* reset addr and status */
8096 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8097 			if (addr == 0x0 && status == 0x0)
8098 				break;
8099 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8100 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8101 				addr);
8102 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8103 				status);
8104 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8105 			break;
8106 		case 167: /* VCE */
8107 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8108 			switch (src_data) {
8109 			case 0:
8110 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8111 				break;
8112 			case 1:
8113 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8114 				break;
8115 			default:
8116 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8117 				break;
8118 			}
8119 			break;
8120 		case 176: /* GFX RB CP_INT */
8121 		case 177: /* GFX IB CP_INT */
8122 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8123 			break;
8124 		case 181: /* CP EOP event */
8125 			DRM_DEBUG("IH: CP EOP\n");
8126 			/* XXX check the bitfield order! */
8127 			me_id = (ring_id & 0x60) >> 5;
8128 			pipe_id = (ring_id & 0x18) >> 3;
8129 			queue_id = (ring_id & 0x7) >> 0;
8130 			switch (me_id) {
8131 			case 0:
8132 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8133 				break;
8134 			case 1:
8135 			case 2:
8136 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8137 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8138 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8139 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8140 				break;
8141 			}
8142 			break;
8143 		case 184: /* CP Privileged reg access */
8144 			DRM_ERROR("Illegal register access in command stream\n");
8145 			/* XXX check the bitfield order! */
8146 			me_id = (ring_id & 0x60) >> 5;
8147 			pipe_id = (ring_id & 0x18) >> 3;
8148 			queue_id = (ring_id & 0x7) >> 0;
8149 			switch (me_id) {
8150 			case 0:
8151 				/* This results in a full GPU reset, but all we need to do is soft
8152 				 * reset the CP for gfx
8153 				 */
8154 				queue_reset = true;
8155 				break;
8156 			case 1:
8157 				/* XXX compute */
8158 				queue_reset = true;
8159 				break;
8160 			case 2:
8161 				/* XXX compute */
8162 				queue_reset = true;
8163 				break;
8164 			}
8165 			break;
8166 		case 185: /* CP Privileged inst */
8167 			DRM_ERROR("Illegal instruction in command stream\n");
8168 			/* XXX check the bitfield order! */
8169 			me_id = (ring_id & 0x60) >> 5;
8170 			pipe_id = (ring_id & 0x18) >> 3;
8171 			queue_id = (ring_id & 0x7) >> 0;
8172 			switch (me_id) {
8173 			case 0:
8174 				/* This results in a full GPU reset, but all we need to do is soft
8175 				 * reset the CP for gfx
8176 				 */
8177 				queue_reset = true;
8178 				break;
8179 			case 1:
8180 				/* XXX compute */
8181 				queue_reset = true;
8182 				break;
8183 			case 2:
8184 				/* XXX compute */
8185 				queue_reset = true;
8186 				break;
8187 			}
8188 			break;
8189 		case 224: /* SDMA trap event */
8190 			/* XXX check the bitfield order! */
8191 			me_id = (ring_id & 0x3) >> 0;
8192 			queue_id = (ring_id & 0xc) >> 2;
8193 			DRM_DEBUG("IH: SDMA trap\n");
8194 			switch (me_id) {
8195 			case 0:
8196 				switch (queue_id) {
8197 				case 0:
8198 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8199 					break;
8200 				case 1:
8201 					/* XXX compute */
8202 					break;
8203 				case 2:
8204 					/* XXX compute */
8205 					break;
8206 				}
8207 				break;
8208 			case 1:
8209 				switch (queue_id) {
8210 				case 0:
8211 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8212 					break;
8213 				case 1:
8214 					/* XXX compute */
8215 					break;
8216 				case 2:
8217 					/* XXX compute */
8218 					break;
8219 				}
8220 				break;
8221 			}
8222 			break;
8223 		case 230: /* thermal low to high */
8224 			DRM_DEBUG("IH: thermal low to high\n");
8225 			rdev->pm.dpm.thermal.high_to_low = false;
8226 			queue_thermal = true;
8227 			break;
8228 		case 231: /* thermal high to low */
8229 			DRM_DEBUG("IH: thermal high to low\n");
8230 			rdev->pm.dpm.thermal.high_to_low = true;
8231 			queue_thermal = true;
8232 			break;
8233 		case 233: /* GUI IDLE */
8234 			DRM_DEBUG("IH: GUI idle\n");
8235 			break;
8236 		case 241: /* SDMA Privileged inst */
8237 		case 247: /* SDMA Privileged inst */
8238 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8239 			/* XXX check the bitfield order! */
8240 			me_id = (ring_id & 0x3) >> 0;
8241 			queue_id = (ring_id & 0xc) >> 2;
8242 			switch (me_id) {
8243 			case 0:
8244 				switch (queue_id) {
8245 				case 0:
8246 					queue_reset = true;
8247 					break;
8248 				case 1:
8249 					/* XXX compute */
8250 					queue_reset = true;
8251 					break;
8252 				case 2:
8253 					/* XXX compute */
8254 					queue_reset = true;
8255 					break;
8256 				}
8257 				break;
8258 			case 1:
8259 				switch (queue_id) {
8260 				case 0:
8261 					queue_reset = true;
8262 					break;
8263 				case 1:
8264 					/* XXX compute */
8265 					queue_reset = true;
8266 					break;
8267 				case 2:
8268 					/* XXX compute */
8269 					queue_reset = true;
8270 					break;
8271 				}
8272 				break;
8273 			}
8274 			break;
8275 		default:
8276 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8277 			break;
8278 		}
8279 
8280 		/* wptr/rptr are in bytes! */
8281 		rptr += 16;
8282 		rptr &= rdev->ih.ptr_mask;
8283 		WREG32(IH_RB_RPTR, rptr);
8284 	}
8285 	if (queue_hotplug)
8286 		schedule_work(&rdev->hotplug_work);
8287 	if (queue_reset) {
8288 		rdev->needs_reset = true;
8289 		wake_up_all(&rdev->fence_queue);
8290 	}
8291 	if (queue_thermal)
8292 		schedule_work(&rdev->pm.dpm.thermal.work);
8293 	rdev->ih.rptr = rptr;
8294 	atomic_set(&rdev->ih.lock, 0);
8295 
8296 	/* make sure wptr hasn't changed while processing */
8297 	wptr = cik_get_ih_wptr(rdev);
8298 	if (wptr != rptr)
8299 		goto restart_ih;
8300 
8301 	return IRQ_HANDLED;
8302 }
8303 
8304 /*
8305  * startup/shutdown callbacks
8306  */
8307 /**
8308  * cik_startup - program the asic to a functional state
8309  *
8310  * @rdev: radeon_device pointer
8311  *
8312  * Programs the asic to a functional state (CIK).
8313  * Called by cik_init() and cik_resume().
8314  * Returns 0 for success, error for failure.
8315  */
cik_startup(struct radeon_device * rdev)8316 static int cik_startup(struct radeon_device *rdev)
8317 {
8318 	struct radeon_ring *ring;
8319 	u32 nop;
8320 	int r;
8321 
8322 	/* enable pcie gen2/3 link */
8323 	cik_pcie_gen3_enable(rdev);
8324 	/* enable aspm */
8325 	cik_program_aspm(rdev);
8326 
8327 	/* scratch needs to be initialized before MC */
8328 	r = r600_vram_scratch_init(rdev);
8329 	if (r)
8330 		return r;
8331 
8332 	cik_mc_program(rdev);
8333 
8334 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8335 		r = ci_mc_load_microcode(rdev);
8336 		if (r) {
8337 			DRM_ERROR("Failed to load MC firmware!\n");
8338 			return r;
8339 		}
8340 	}
8341 
8342 	r = cik_pcie_gart_enable(rdev);
8343 	if (r)
8344 		return r;
8345 	cik_gpu_init(rdev);
8346 
8347 	/* allocate rlc buffers */
8348 	if (rdev->flags & RADEON_IS_IGP) {
8349 		if (rdev->family == CHIP_KAVERI) {
8350 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8351 			rdev->rlc.reg_list_size =
8352 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8353 		} else {
8354 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8355 			rdev->rlc.reg_list_size =
8356 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8357 		}
8358 	}
8359 	rdev->rlc.cs_data = ci_cs_data;
8360 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8361 	r = sumo_rlc_init(rdev);
8362 	if (r) {
8363 		DRM_ERROR("Failed to init rlc BOs!\n");
8364 		return r;
8365 	}
8366 
8367 	/* allocate wb buffer */
8368 	r = radeon_wb_init(rdev);
8369 	if (r)
8370 		return r;
8371 
8372 	/* allocate mec buffers */
8373 	r = cik_mec_init(rdev);
8374 	if (r) {
8375 		DRM_ERROR("Failed to init MEC BOs!\n");
8376 		return r;
8377 	}
8378 
8379 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8380 	if (r) {
8381 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8382 		return r;
8383 	}
8384 
8385 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8386 	if (r) {
8387 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8388 		return r;
8389 	}
8390 
8391 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8392 	if (r) {
8393 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8394 		return r;
8395 	}
8396 
8397 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8398 	if (r) {
8399 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8400 		return r;
8401 	}
8402 
8403 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8404 	if (r) {
8405 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8406 		return r;
8407 	}
8408 
8409 	r = radeon_uvd_resume(rdev);
8410 	if (!r) {
8411 		r = uvd_v4_2_resume(rdev);
8412 		if (!r) {
8413 			r = radeon_fence_driver_start_ring(rdev,
8414 							   R600_RING_TYPE_UVD_INDEX);
8415 			if (r)
8416 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8417 		}
8418 	}
8419 	if (r)
8420 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8421 
8422 	r = radeon_vce_resume(rdev);
8423 	if (!r) {
8424 		r = vce_v2_0_resume(rdev);
8425 		if (!r)
8426 			r = radeon_fence_driver_start_ring(rdev,
8427 							   TN_RING_TYPE_VCE1_INDEX);
8428 		if (!r)
8429 			r = radeon_fence_driver_start_ring(rdev,
8430 							   TN_RING_TYPE_VCE2_INDEX);
8431 	}
8432 	if (r) {
8433 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8434 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8435 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8436 	}
8437 
8438 	/* Enable IRQ */
8439 	if (!rdev->irq.installed) {
8440 		r = radeon_irq_kms_init(rdev);
8441 		if (r)
8442 			return r;
8443 	}
8444 
8445 	r = cik_irq_init(rdev);
8446 	if (r) {
8447 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8448 		radeon_irq_kms_fini(rdev);
8449 		return r;
8450 	}
8451 	cik_irq_set(rdev);
8452 
8453 	if (rdev->family == CHIP_HAWAII) {
8454 		if (rdev->new_fw)
8455 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8456 		else
8457 			nop = RADEON_CP_PACKET2;
8458 	} else {
8459 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8460 	}
8461 
8462 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8463 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8464 			     nop);
8465 	if (r)
8466 		return r;
8467 
8468 	/* set up the compute queues */
8469 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8470 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8471 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8472 			     nop);
8473 	if (r)
8474 		return r;
8475 	ring->me = 1; /* first MEC */
8476 	ring->pipe = 0; /* first pipe */
8477 	ring->queue = 0; /* first queue */
8478 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8479 
8480 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8481 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8482 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8483 			     nop);
8484 	if (r)
8485 		return r;
8486 	/* dGPU only have 1 MEC */
8487 	ring->me = 1; /* first MEC */
8488 	ring->pipe = 0; /* first pipe */
8489 	ring->queue = 1; /* second queue */
8490 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8491 
8492 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8493 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8494 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8495 	if (r)
8496 		return r;
8497 
8498 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8499 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8500 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8501 	if (r)
8502 		return r;
8503 
8504 	r = cik_cp_resume(rdev);
8505 	if (r)
8506 		return r;
8507 
8508 	r = cik_sdma_resume(rdev);
8509 	if (r)
8510 		return r;
8511 
8512 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8513 	if (ring->ring_size) {
8514 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8515 				     RADEON_CP_PACKET2);
8516 		if (!r)
8517 			r = uvd_v1_0_init(rdev);
8518 		if (r)
8519 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8520 	}
8521 
8522 	r = -ENOENT;
8523 
8524 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8525 	if (ring->ring_size)
8526 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8527 				     VCE_CMD_NO_OP);
8528 
8529 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8530 	if (ring->ring_size)
8531 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8532 				     VCE_CMD_NO_OP);
8533 
8534 	if (!r)
8535 		r = vce_v1_0_init(rdev);
8536 	else if (r != -ENOENT)
8537 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8538 
8539 	r = radeon_ib_pool_init(rdev);
8540 	if (r) {
8541 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8542 		return r;
8543 	}
8544 
8545 	r = radeon_vm_manager_init(rdev);
8546 	if (r) {
8547 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8548 		return r;
8549 	}
8550 
8551 	r = dce6_audio_init(rdev);
8552 	if (r)
8553 		return r;
8554 
8555 	return 0;
8556 }
8557 
8558 /**
8559  * cik_resume - resume the asic to a functional state
8560  *
8561  * @rdev: radeon_device pointer
8562  *
8563  * Programs the asic to a functional state (CIK).
8564  * Called at resume.
8565  * Returns 0 for success, error for failure.
8566  */
cik_resume(struct radeon_device * rdev)8567 int cik_resume(struct radeon_device *rdev)
8568 {
8569 	int r;
8570 
8571 	/* post card */
8572 	atom_asic_init(rdev->mode_info.atom_context);
8573 
8574 	/* init golden registers */
8575 	cik_init_golden_registers(rdev);
8576 
8577 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8578 		radeon_pm_resume(rdev);
8579 
8580 	rdev->accel_working = true;
8581 	r = cik_startup(rdev);
8582 	if (r) {
8583 		DRM_ERROR("cik startup failed on resume\n");
8584 		rdev->accel_working = false;
8585 		return r;
8586 	}
8587 
8588 	return r;
8589 
8590 }
8591 
8592 /**
8593  * cik_suspend - suspend the asic
8594  *
8595  * @rdev: radeon_device pointer
8596  *
8597  * Bring the chip into a state suitable for suspend (CIK).
8598  * Called at suspend.
8599  * Returns 0 for success.
8600  */
cik_suspend(struct radeon_device * rdev)8601 int cik_suspend(struct radeon_device *rdev)
8602 {
8603 	radeon_pm_suspend(rdev);
8604 	dce6_audio_fini(rdev);
8605 	radeon_vm_manager_fini(rdev);
8606 	cik_cp_enable(rdev, false);
8607 	cik_sdma_enable(rdev, false);
8608 	uvd_v1_0_fini(rdev);
8609 	radeon_uvd_suspend(rdev);
8610 	radeon_vce_suspend(rdev);
8611 	cik_fini_pg(rdev);
8612 	cik_fini_cg(rdev);
8613 	cik_irq_suspend(rdev);
8614 	radeon_wb_disable(rdev);
8615 	cik_pcie_gart_disable(rdev);
8616 	return 0;
8617 }
8618 
8619 /* Plan is to move initialization in that function and use
8620  * helper function so that radeon_device_init pretty much
8621  * do nothing more than calling asic specific function. This
8622  * should also allow to remove a bunch of callback function
8623  * like vram_info.
8624  */
8625 /**
8626  * cik_init - asic specific driver and hw init
8627  *
8628  * @rdev: radeon_device pointer
8629  *
8630  * Setup asic specific driver variables and program the hw
8631  * to a functional state (CIK).
8632  * Called at driver startup.
8633  * Returns 0 for success, errors for failure.
8634  */
cik_init(struct radeon_device * rdev)8635 int cik_init(struct radeon_device *rdev)
8636 {
8637 	struct radeon_ring *ring;
8638 	int r;
8639 
8640 	/* Read BIOS */
8641 	if (!radeon_get_bios(rdev)) {
8642 		if (ASIC_IS_AVIVO(rdev))
8643 			return -EINVAL;
8644 	}
8645 	/* Must be an ATOMBIOS */
8646 	if (!rdev->is_atom_bios) {
8647 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8648 		return -EINVAL;
8649 	}
8650 	r = radeon_atombios_init(rdev);
8651 	if (r)
8652 		return r;
8653 
8654 	/* Post card if necessary */
8655 	if (!radeon_card_posted(rdev)) {
8656 		if (!rdev->bios) {
8657 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8658 			return -EINVAL;
8659 		}
8660 		DRM_INFO("GPU not posted. posting now...\n");
8661 		atom_asic_init(rdev->mode_info.atom_context);
8662 	}
8663 	/* init golden registers */
8664 	cik_init_golden_registers(rdev);
8665 	/* Initialize scratch registers */
8666 	cik_scratch_init(rdev);
8667 	/* Initialize surface registers */
8668 	radeon_surface_init(rdev);
8669 	/* Initialize clocks */
8670 	radeon_get_clock_info(rdev->ddev);
8671 
8672 	/* Fence driver */
8673 	r = radeon_fence_driver_init(rdev);
8674 	if (r)
8675 		return r;
8676 
8677 	/* initialize memory controller */
8678 	r = cik_mc_init(rdev);
8679 	if (r)
8680 		return r;
8681 	/* Memory manager */
8682 	r = radeon_bo_init(rdev);
8683 	if (r)
8684 		return r;
8685 
8686 	if (rdev->flags & RADEON_IS_IGP) {
8687 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8688 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8689 			r = cik_init_microcode(rdev);
8690 			if (r) {
8691 				DRM_ERROR("Failed to load firmware!\n");
8692 				return r;
8693 			}
8694 		}
8695 	} else {
8696 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8697 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8698 		    !rdev->mc_fw) {
8699 			r = cik_init_microcode(rdev);
8700 			if (r) {
8701 				DRM_ERROR("Failed to load firmware!\n");
8702 				return r;
8703 			}
8704 		}
8705 	}
8706 
8707 	/* Initialize power management */
8708 	radeon_pm_init(rdev);
8709 
8710 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8711 	ring->ring_obj = NULL;
8712 	r600_ring_init(rdev, ring, 1024 * 1024);
8713 
8714 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8715 	ring->ring_obj = NULL;
8716 	r600_ring_init(rdev, ring, 1024 * 1024);
8717 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8718 	if (r)
8719 		return r;
8720 
8721 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8722 	ring->ring_obj = NULL;
8723 	r600_ring_init(rdev, ring, 1024 * 1024);
8724 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8725 	if (r)
8726 		return r;
8727 
8728 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8729 	ring->ring_obj = NULL;
8730 	r600_ring_init(rdev, ring, 256 * 1024);
8731 
8732 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8733 	ring->ring_obj = NULL;
8734 	r600_ring_init(rdev, ring, 256 * 1024);
8735 
8736 	r = radeon_uvd_init(rdev);
8737 	if (!r) {
8738 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8739 		ring->ring_obj = NULL;
8740 		r600_ring_init(rdev, ring, 4096);
8741 	}
8742 
8743 	r = radeon_vce_init(rdev);
8744 	if (!r) {
8745 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8746 		ring->ring_obj = NULL;
8747 		r600_ring_init(rdev, ring, 4096);
8748 
8749 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8750 		ring->ring_obj = NULL;
8751 		r600_ring_init(rdev, ring, 4096);
8752 	}
8753 
8754 	rdev->ih.ring_obj = NULL;
8755 	r600_ih_ring_init(rdev, 64 * 1024);
8756 
8757 	r = r600_pcie_gart_init(rdev);
8758 	if (r)
8759 		return r;
8760 
8761 	rdev->accel_working = true;
8762 	r = cik_startup(rdev);
8763 	if (r) {
8764 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8765 		cik_cp_fini(rdev);
8766 		cik_sdma_fini(rdev);
8767 		cik_irq_fini(rdev);
8768 		sumo_rlc_fini(rdev);
8769 		cik_mec_fini(rdev);
8770 		radeon_wb_fini(rdev);
8771 		radeon_ib_pool_fini(rdev);
8772 		radeon_vm_manager_fini(rdev);
8773 		radeon_irq_kms_fini(rdev);
8774 		cik_pcie_gart_fini(rdev);
8775 		rdev->accel_working = false;
8776 	}
8777 
8778 	/* Don't start up if the MC ucode is missing.
8779 	 * The default clocks and voltages before the MC ucode
8780 	 * is loaded are not suffient for advanced operations.
8781 	 */
8782 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8783 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8784 		return -EINVAL;
8785 	}
8786 
8787 	return 0;
8788 }
8789 
8790 /**
8791  * cik_fini - asic specific driver and hw fini
8792  *
8793  * @rdev: radeon_device pointer
8794  *
8795  * Tear down the asic specific driver variables and program the hw
8796  * to an idle state (CIK).
8797  * Called at driver unload.
8798  */
cik_fini(struct radeon_device * rdev)8799 void cik_fini(struct radeon_device *rdev)
8800 {
8801 	radeon_pm_fini(rdev);
8802 	cik_cp_fini(rdev);
8803 	cik_sdma_fini(rdev);
8804 	cik_fini_pg(rdev);
8805 	cik_fini_cg(rdev);
8806 	cik_irq_fini(rdev);
8807 	sumo_rlc_fini(rdev);
8808 	cik_mec_fini(rdev);
8809 	radeon_wb_fini(rdev);
8810 	radeon_vm_manager_fini(rdev);
8811 	radeon_ib_pool_fini(rdev);
8812 	radeon_irq_kms_fini(rdev);
8813 	uvd_v1_0_fini(rdev);
8814 	radeon_uvd_fini(rdev);
8815 	radeon_vce_fini(rdev);
8816 	cik_pcie_gart_fini(rdev);
8817 	r600_vram_scratch_fini(rdev);
8818 	radeon_gem_fini(rdev);
8819 	radeon_fence_driver_fini(rdev);
8820 	radeon_bo_fini(rdev);
8821 	radeon_atombios_fini(rdev);
8822 	kfree(rdev->bios);
8823 	rdev->bios = NULL;
8824 }
8825 
dce8_program_fmt(struct drm_encoder * encoder)8826 void dce8_program_fmt(struct drm_encoder *encoder)
8827 {
8828 	struct drm_device *dev = encoder->dev;
8829 	struct radeon_device *rdev = dev->dev_private;
8830 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8831 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8832 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8833 	int bpc = 0;
8834 	u32 tmp = 0;
8835 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8836 
8837 	if (connector) {
8838 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8839 		bpc = radeon_get_monitor_bpc(connector);
8840 		dither = radeon_connector->dither;
8841 	}
8842 
8843 	/* LVDS/eDP FMT is set up by atom */
8844 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8845 		return;
8846 
8847 	/* not needed for analog */
8848 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8849 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8850 		return;
8851 
8852 	if (bpc == 0)
8853 		return;
8854 
8855 	switch (bpc) {
8856 	case 6:
8857 		if (dither == RADEON_FMT_DITHER_ENABLE)
8858 			/* XXX sort out optimal dither settings */
8859 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8860 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8861 		else
8862 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8863 		break;
8864 	case 8:
8865 		if (dither == RADEON_FMT_DITHER_ENABLE)
8866 			/* XXX sort out optimal dither settings */
8867 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8868 				FMT_RGB_RANDOM_ENABLE |
8869 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8870 		else
8871 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8872 		break;
8873 	case 10:
8874 		if (dither == RADEON_FMT_DITHER_ENABLE)
8875 			/* XXX sort out optimal dither settings */
8876 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8877 				FMT_RGB_RANDOM_ENABLE |
8878 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8879 		else
8880 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8881 		break;
8882 	default:
8883 		/* not needed */
8884 		break;
8885 	}
8886 
8887 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8888 }
8889 
8890 /* display watermark setup */
8891 /**
8892  * dce8_line_buffer_adjust - Set up the line buffer
8893  *
8894  * @rdev: radeon_device pointer
8895  * @radeon_crtc: the selected display controller
8896  * @mode: the current display mode on the selected display
8897  * controller
8898  *
8899  * Setup up the line buffer allocation for
8900  * the selected display controller (CIK).
8901  * Returns the line buffer size in pixels.
8902  */
dce8_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode)8903 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8904 				   struct radeon_crtc *radeon_crtc,
8905 				   struct drm_display_mode *mode)
8906 {
8907 	u32 tmp, buffer_alloc, i;
8908 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8909 	/*
8910 	 * Line Buffer Setup
8911 	 * There are 6 line buffers, one for each display controllers.
8912 	 * There are 3 partitions per LB. Select the number of partitions
8913 	 * to enable based on the display width.  For display widths larger
8914 	 * than 4096, you need use to use 2 display controllers and combine
8915 	 * them using the stereo blender.
8916 	 */
8917 	if (radeon_crtc->base.enabled && mode) {
8918 		if (mode->crtc_hdisplay < 1920) {
8919 			tmp = 1;
8920 			buffer_alloc = 2;
8921 		} else if (mode->crtc_hdisplay < 2560) {
8922 			tmp = 2;
8923 			buffer_alloc = 2;
8924 		} else if (mode->crtc_hdisplay < 4096) {
8925 			tmp = 0;
8926 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8927 		} else {
8928 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8929 			tmp = 0;
8930 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8931 		}
8932 	} else {
8933 		tmp = 1;
8934 		buffer_alloc = 0;
8935 	}
8936 
8937 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8938 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8939 
8940 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8941 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8942 	for (i = 0; i < rdev->usec_timeout; i++) {
8943 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8944 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8945 			break;
8946 		udelay(1);
8947 	}
8948 
8949 	if (radeon_crtc->base.enabled && mode) {
8950 		switch (tmp) {
8951 		case 0:
8952 		default:
8953 			return 4096 * 2;
8954 		case 1:
8955 			return 1920 * 2;
8956 		case 2:
8957 			return 2560 * 2;
8958 		}
8959 	}
8960 
8961 	/* controller not enabled, so no lb used */
8962 	return 0;
8963 }
8964 
8965 /**
8966  * cik_get_number_of_dram_channels - get the number of dram channels
8967  *
8968  * @rdev: radeon_device pointer
8969  *
8970  * Look up the number of video ram channels (CIK).
8971  * Used for display watermark bandwidth calculations
8972  * Returns the number of dram channels
8973  */
cik_get_number_of_dram_channels(struct radeon_device * rdev)8974 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8975 {
8976 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8977 
8978 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8979 	case 0:
8980 	default:
8981 		return 1;
8982 	case 1:
8983 		return 2;
8984 	case 2:
8985 		return 4;
8986 	case 3:
8987 		return 8;
8988 	case 4:
8989 		return 3;
8990 	case 5:
8991 		return 6;
8992 	case 6:
8993 		return 10;
8994 	case 7:
8995 		return 12;
8996 	case 8:
8997 		return 16;
8998 	}
8999 }
9000 
9001 struct dce8_wm_params {
9002 	u32 dram_channels; /* number of dram channels */
9003 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9004 	u32 sclk;          /* engine clock in kHz */
9005 	u32 disp_clk;      /* display clock in kHz */
9006 	u32 src_width;     /* viewport width */
9007 	u32 active_time;   /* active display time in ns */
9008 	u32 blank_time;    /* blank time in ns */
9009 	bool interlaced;    /* mode is interlaced */
9010 	fixed20_12 vsc;    /* vertical scale ratio */
9011 	u32 num_heads;     /* number of active crtcs */
9012 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9013 	u32 lb_size;       /* line buffer allocated to pipe */
9014 	u32 vtaps;         /* vertical scaler taps */
9015 };
9016 
9017 /**
9018  * dce8_dram_bandwidth - get the dram bandwidth
9019  *
9020  * @wm: watermark calculation data
9021  *
9022  * Calculate the raw dram bandwidth (CIK).
9023  * Used for display watermark bandwidth calculations
9024  * Returns the dram bandwidth in MBytes/s
9025  */
dce8_dram_bandwidth(struct dce8_wm_params * wm)9026 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9027 {
9028 	/* Calculate raw DRAM Bandwidth */
9029 	fixed20_12 dram_efficiency; /* 0.7 */
9030 	fixed20_12 yclk, dram_channels, bandwidth;
9031 	fixed20_12 a;
9032 
9033 	a.full = dfixed_const(1000);
9034 	yclk.full = dfixed_const(wm->yclk);
9035 	yclk.full = dfixed_div(yclk, a);
9036 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9037 	a.full = dfixed_const(10);
9038 	dram_efficiency.full = dfixed_const(7);
9039 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9040 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9041 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9042 
9043 	return dfixed_trunc(bandwidth);
9044 }
9045 
9046 /**
9047  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9048  *
9049  * @wm: watermark calculation data
9050  *
9051  * Calculate the dram bandwidth used for display (CIK).
9052  * Used for display watermark bandwidth calculations
9053  * Returns the dram bandwidth for display in MBytes/s
9054  */
dce8_dram_bandwidth_for_display(struct dce8_wm_params * wm)9055 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9056 {
9057 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9058 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9059 	fixed20_12 yclk, dram_channels, bandwidth;
9060 	fixed20_12 a;
9061 
9062 	a.full = dfixed_const(1000);
9063 	yclk.full = dfixed_const(wm->yclk);
9064 	yclk.full = dfixed_div(yclk, a);
9065 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9066 	a.full = dfixed_const(10);
9067 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9068 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9069 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9070 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9071 
9072 	return dfixed_trunc(bandwidth);
9073 }
9074 
9075 /**
9076  * dce8_data_return_bandwidth - get the data return bandwidth
9077  *
9078  * @wm: watermark calculation data
9079  *
9080  * Calculate the data return bandwidth used for display (CIK).
9081  * Used for display watermark bandwidth calculations
9082  * Returns the data return bandwidth in MBytes/s
9083  */
dce8_data_return_bandwidth(struct dce8_wm_params * wm)9084 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9085 {
9086 	/* Calculate the display Data return Bandwidth */
9087 	fixed20_12 return_efficiency; /* 0.8 */
9088 	fixed20_12 sclk, bandwidth;
9089 	fixed20_12 a;
9090 
9091 	a.full = dfixed_const(1000);
9092 	sclk.full = dfixed_const(wm->sclk);
9093 	sclk.full = dfixed_div(sclk, a);
9094 	a.full = dfixed_const(10);
9095 	return_efficiency.full = dfixed_const(8);
9096 	return_efficiency.full = dfixed_div(return_efficiency, a);
9097 	a.full = dfixed_const(32);
9098 	bandwidth.full = dfixed_mul(a, sclk);
9099 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9100 
9101 	return dfixed_trunc(bandwidth);
9102 }
9103 
9104 /**
9105  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9106  *
9107  * @wm: watermark calculation data
9108  *
9109  * Calculate the dmif bandwidth used for display (CIK).
9110  * Used for display watermark bandwidth calculations
9111  * Returns the dmif bandwidth in MBytes/s
9112  */
dce8_dmif_request_bandwidth(struct dce8_wm_params * wm)9113 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9114 {
9115 	/* Calculate the DMIF Request Bandwidth */
9116 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9117 	fixed20_12 disp_clk, bandwidth;
9118 	fixed20_12 a, b;
9119 
9120 	a.full = dfixed_const(1000);
9121 	disp_clk.full = dfixed_const(wm->disp_clk);
9122 	disp_clk.full = dfixed_div(disp_clk, a);
9123 	a.full = dfixed_const(32);
9124 	b.full = dfixed_mul(a, disp_clk);
9125 
9126 	a.full = dfixed_const(10);
9127 	disp_clk_request_efficiency.full = dfixed_const(8);
9128 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9129 
9130 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9131 
9132 	return dfixed_trunc(bandwidth);
9133 }
9134 
9135 /**
9136  * dce8_available_bandwidth - get the min available bandwidth
9137  *
9138  * @wm: watermark calculation data
9139  *
9140  * Calculate the min available bandwidth used for display (CIK).
9141  * Used for display watermark bandwidth calculations
9142  * Returns the min available bandwidth in MBytes/s
9143  */
dce8_available_bandwidth(struct dce8_wm_params * wm)9144 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9145 {
9146 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9147 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9148 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9149 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9150 
9151 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9152 }
9153 
9154 /**
9155  * dce8_average_bandwidth - get the average available bandwidth
9156  *
9157  * @wm: watermark calculation data
9158  *
9159  * Calculate the average available bandwidth used for display (CIK).
9160  * Used for display watermark bandwidth calculations
9161  * Returns the average available bandwidth in MBytes/s
9162  */
dce8_average_bandwidth(struct dce8_wm_params * wm)9163 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9164 {
9165 	/* Calculate the display mode Average Bandwidth
9166 	 * DisplayMode should contain the source and destination dimensions,
9167 	 * timing, etc.
9168 	 */
9169 	fixed20_12 bpp;
9170 	fixed20_12 line_time;
9171 	fixed20_12 src_width;
9172 	fixed20_12 bandwidth;
9173 	fixed20_12 a;
9174 
9175 	a.full = dfixed_const(1000);
9176 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9177 	line_time.full = dfixed_div(line_time, a);
9178 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9179 	src_width.full = dfixed_const(wm->src_width);
9180 	bandwidth.full = dfixed_mul(src_width, bpp);
9181 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9182 	bandwidth.full = dfixed_div(bandwidth, line_time);
9183 
9184 	return dfixed_trunc(bandwidth);
9185 }
9186 
9187 /**
9188  * dce8_latency_watermark - get the latency watermark
9189  *
9190  * @wm: watermark calculation data
9191  *
9192  * Calculate the latency watermark (CIK).
9193  * Used for display watermark bandwidth calculations
9194  * Returns the latency watermark in ns
9195  */
dce8_latency_watermark(struct dce8_wm_params * wm)9196 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9197 {
9198 	/* First calculate the latency in ns */
9199 	u32 mc_latency = 2000; /* 2000 ns. */
9200 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9201 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9202 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9203 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9204 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9205 		(wm->num_heads * cursor_line_pair_return_time);
9206 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9207 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9208 	u32 tmp, dmif_size = 12288;
9209 	fixed20_12 a, b, c;
9210 
9211 	if (wm->num_heads == 0)
9212 		return 0;
9213 
9214 	a.full = dfixed_const(2);
9215 	b.full = dfixed_const(1);
9216 	if ((wm->vsc.full > a.full) ||
9217 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9218 	    (wm->vtaps >= 5) ||
9219 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9220 		max_src_lines_per_dst_line = 4;
9221 	else
9222 		max_src_lines_per_dst_line = 2;
9223 
9224 	a.full = dfixed_const(available_bandwidth);
9225 	b.full = dfixed_const(wm->num_heads);
9226 	a.full = dfixed_div(a, b);
9227 
9228 	b.full = dfixed_const(mc_latency + 512);
9229 	c.full = dfixed_const(wm->disp_clk);
9230 	b.full = dfixed_div(b, c);
9231 
9232 	c.full = dfixed_const(dmif_size);
9233 	b.full = dfixed_div(c, b);
9234 
9235 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9236 
9237 	b.full = dfixed_const(1000);
9238 	c.full = dfixed_const(wm->disp_clk);
9239 	b.full = dfixed_div(c, b);
9240 	c.full = dfixed_const(wm->bytes_per_pixel);
9241 	b.full = dfixed_mul(b, c);
9242 
9243 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9244 
9245 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9246 	b.full = dfixed_const(1000);
9247 	c.full = dfixed_const(lb_fill_bw);
9248 	b.full = dfixed_div(c, b);
9249 	a.full = dfixed_div(a, b);
9250 	line_fill_time = dfixed_trunc(a);
9251 
9252 	if (line_fill_time < wm->active_time)
9253 		return latency;
9254 	else
9255 		return latency + (line_fill_time - wm->active_time);
9256 
9257 }
9258 
9259 /**
9260  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9261  * average and available dram bandwidth
9262  *
9263  * @wm: watermark calculation data
9264  *
9265  * Check if the display average bandwidth fits in the display
9266  * dram bandwidth (CIK).
9267  * Used for display watermark bandwidth calculations
9268  * Returns true if the display fits, false if not.
9269  */
dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params * wm)9270 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9271 {
9272 	if (dce8_average_bandwidth(wm) <=
9273 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9274 		return true;
9275 	else
9276 		return false;
9277 }
9278 
9279 /**
9280  * dce8_average_bandwidth_vs_available_bandwidth - check
9281  * average and available bandwidth
9282  *
9283  * @wm: watermark calculation data
9284  *
9285  * Check if the display average bandwidth fits in the display
9286  * available bandwidth (CIK).
9287  * Used for display watermark bandwidth calculations
9288  * Returns true if the display fits, false if not.
9289  */
dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params * wm)9290 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9291 {
9292 	if (dce8_average_bandwidth(wm) <=
9293 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9294 		return true;
9295 	else
9296 		return false;
9297 }
9298 
9299 /**
9300  * dce8_check_latency_hiding - check latency hiding
9301  *
9302  * @wm: watermark calculation data
9303  *
9304  * Check latency hiding (CIK).
9305  * Used for display watermark bandwidth calculations
9306  * Returns true if the display fits, false if not.
9307  */
dce8_check_latency_hiding(struct dce8_wm_params * wm)9308 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9309 {
9310 	u32 lb_partitions = wm->lb_size / wm->src_width;
9311 	u32 line_time = wm->active_time + wm->blank_time;
9312 	u32 latency_tolerant_lines;
9313 	u32 latency_hiding;
9314 	fixed20_12 a;
9315 
9316 	a.full = dfixed_const(1);
9317 	if (wm->vsc.full > a.full)
9318 		latency_tolerant_lines = 1;
9319 	else {
9320 		if (lb_partitions <= (wm->vtaps + 1))
9321 			latency_tolerant_lines = 1;
9322 		else
9323 			latency_tolerant_lines = 2;
9324 	}
9325 
9326 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9327 
9328 	if (dce8_latency_watermark(wm) <= latency_hiding)
9329 		return true;
9330 	else
9331 		return false;
9332 }
9333 
9334 /**
9335  * dce8_program_watermarks - program display watermarks
9336  *
9337  * @rdev: radeon_device pointer
9338  * @radeon_crtc: the selected display controller
9339  * @lb_size: line buffer size
9340  * @num_heads: number of display controllers in use
9341  *
9342  * Calculate and program the display watermarks for the
9343  * selected display controller (CIK).
9344  */
dce8_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)9345 static void dce8_program_watermarks(struct radeon_device *rdev,
9346 				    struct radeon_crtc *radeon_crtc,
9347 				    u32 lb_size, u32 num_heads)
9348 {
9349 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9350 	struct dce8_wm_params wm_low, wm_high;
9351 	u32 pixel_period;
9352 	u32 line_time = 0;
9353 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9354 	u32 tmp, wm_mask;
9355 
9356 	if (radeon_crtc->base.enabled && num_heads && mode) {
9357 		pixel_period = 1000000 / (u32)mode->clock;
9358 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9359 
9360 		/* watermark for high clocks */
9361 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9362 		    rdev->pm.dpm_enabled) {
9363 			wm_high.yclk =
9364 				radeon_dpm_get_mclk(rdev, false) * 10;
9365 			wm_high.sclk =
9366 				radeon_dpm_get_sclk(rdev, false) * 10;
9367 		} else {
9368 			wm_high.yclk = rdev->pm.current_mclk * 10;
9369 			wm_high.sclk = rdev->pm.current_sclk * 10;
9370 		}
9371 
9372 		wm_high.disp_clk = mode->clock;
9373 		wm_high.src_width = mode->crtc_hdisplay;
9374 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9375 		wm_high.blank_time = line_time - wm_high.active_time;
9376 		wm_high.interlaced = false;
9377 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9378 			wm_high.interlaced = true;
9379 		wm_high.vsc = radeon_crtc->vsc;
9380 		wm_high.vtaps = 1;
9381 		if (radeon_crtc->rmx_type != RMX_OFF)
9382 			wm_high.vtaps = 2;
9383 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9384 		wm_high.lb_size = lb_size;
9385 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9386 		wm_high.num_heads = num_heads;
9387 
9388 		/* set for high clocks */
9389 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9390 
9391 		/* possibly force display priority to high */
9392 		/* should really do this at mode validation time... */
9393 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9394 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9395 		    !dce8_check_latency_hiding(&wm_high) ||
9396 		    (rdev->disp_priority == 2)) {
9397 			DRM_DEBUG_KMS("force priority to high\n");
9398 		}
9399 
9400 		/* watermark for low clocks */
9401 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9402 		    rdev->pm.dpm_enabled) {
9403 			wm_low.yclk =
9404 				radeon_dpm_get_mclk(rdev, true) * 10;
9405 			wm_low.sclk =
9406 				radeon_dpm_get_sclk(rdev, true) * 10;
9407 		} else {
9408 			wm_low.yclk = rdev->pm.current_mclk * 10;
9409 			wm_low.sclk = rdev->pm.current_sclk * 10;
9410 		}
9411 
9412 		wm_low.disp_clk = mode->clock;
9413 		wm_low.src_width = mode->crtc_hdisplay;
9414 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9415 		wm_low.blank_time = line_time - wm_low.active_time;
9416 		wm_low.interlaced = false;
9417 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9418 			wm_low.interlaced = true;
9419 		wm_low.vsc = radeon_crtc->vsc;
9420 		wm_low.vtaps = 1;
9421 		if (radeon_crtc->rmx_type != RMX_OFF)
9422 			wm_low.vtaps = 2;
9423 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9424 		wm_low.lb_size = lb_size;
9425 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9426 		wm_low.num_heads = num_heads;
9427 
9428 		/* set for low clocks */
9429 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9430 
9431 		/* possibly force display priority to high */
9432 		/* should really do this at mode validation time... */
9433 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9434 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9435 		    !dce8_check_latency_hiding(&wm_low) ||
9436 		    (rdev->disp_priority == 2)) {
9437 			DRM_DEBUG_KMS("force priority to high\n");
9438 		}
9439 	}
9440 
9441 	/* select wm A */
9442 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9443 	tmp = wm_mask;
9444 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9445 	tmp |= LATENCY_WATERMARK_MASK(1);
9446 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9447 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9448 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9449 		LATENCY_HIGH_WATERMARK(line_time)));
9450 	/* select wm B */
9451 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9452 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9453 	tmp |= LATENCY_WATERMARK_MASK(2);
9454 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9455 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9456 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9457 		LATENCY_HIGH_WATERMARK(line_time)));
9458 	/* restore original selection */
9459 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9460 
9461 	/* save values for DPM */
9462 	radeon_crtc->line_time = line_time;
9463 	radeon_crtc->wm_high = latency_watermark_a;
9464 	radeon_crtc->wm_low = latency_watermark_b;
9465 }
9466 
9467 /**
9468  * dce8_bandwidth_update - program display watermarks
9469  *
9470  * @rdev: radeon_device pointer
9471  *
9472  * Calculate and program the display watermarks and line
9473  * buffer allocation (CIK).
9474  */
dce8_bandwidth_update(struct radeon_device * rdev)9475 void dce8_bandwidth_update(struct radeon_device *rdev)
9476 {
9477 	struct drm_display_mode *mode = NULL;
9478 	u32 num_heads = 0, lb_size;
9479 	int i;
9480 
9481 	if (!rdev->mode_info.mode_config_initialized)
9482 		return;
9483 
9484 	radeon_update_display_priority(rdev);
9485 
9486 	for (i = 0; i < rdev->num_crtc; i++) {
9487 		if (rdev->mode_info.crtcs[i]->base.enabled)
9488 			num_heads++;
9489 	}
9490 	for (i = 0; i < rdev->num_crtc; i++) {
9491 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9492 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9493 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9494 	}
9495 }
9496 
9497 /**
9498  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9499  *
9500  * @rdev: radeon_device pointer
9501  *
9502  * Fetches a GPU clock counter snapshot (SI).
9503  * Returns the 64 bit clock counter snapshot.
9504  */
cik_get_gpu_clock_counter(struct radeon_device * rdev)9505 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9506 {
9507 	uint64_t clock;
9508 
9509 	mutex_lock(&rdev->gpu_clock_mutex);
9510 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9511 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9512 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9513 	mutex_unlock(&rdev->gpu_clock_mutex);
9514 	return clock;
9515 }
9516 
cik_set_uvd_clock(struct radeon_device * rdev,u32 clock,u32 cntl_reg,u32 status_reg)9517 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9518                               u32 cntl_reg, u32 status_reg)
9519 {
9520 	int r, i;
9521 	struct atom_clock_dividers dividers;
9522 	uint32_t tmp;
9523 
9524 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9525 					   clock, false, &dividers);
9526 	if (r)
9527 		return r;
9528 
9529 	tmp = RREG32_SMC(cntl_reg);
9530 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9531 	tmp |= dividers.post_divider;
9532 	WREG32_SMC(cntl_reg, tmp);
9533 
9534 	for (i = 0; i < 100; i++) {
9535 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9536 			break;
9537 		mdelay(10);
9538 	}
9539 	if (i == 100)
9540 		return -ETIMEDOUT;
9541 
9542 	return 0;
9543 }
9544 
cik_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)9545 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9546 {
9547 	int r = 0;
9548 
9549 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9550 	if (r)
9551 		return r;
9552 
9553 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9554 	return r;
9555 }
9556 
cik_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)9557 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9558 {
9559 	int r, i;
9560 	struct atom_clock_dividers dividers;
9561 	u32 tmp;
9562 
9563 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9564 					   ecclk, false, &dividers);
9565 	if (r)
9566 		return r;
9567 
9568 	for (i = 0; i < 100; i++) {
9569 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9570 			break;
9571 		mdelay(10);
9572 	}
9573 	if (i == 100)
9574 		return -ETIMEDOUT;
9575 
9576 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9577 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9578 	tmp |= dividers.post_divider;
9579 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9580 
9581 	for (i = 0; i < 100; i++) {
9582 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9583 			break;
9584 		mdelay(10);
9585 	}
9586 	if (i == 100)
9587 		return -ETIMEDOUT;
9588 
9589 	return 0;
9590 }
9591 
cik_pcie_gen3_enable(struct radeon_device * rdev)9592 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9593 {
9594 	struct pci_dev *root = rdev->pdev->bus->self;
9595 	int bridge_pos, gpu_pos;
9596 	u32 speed_cntl, mask, current_data_rate;
9597 	int ret, i;
9598 	u16 tmp16;
9599 
9600 	if (pci_is_root_bus(rdev->pdev->bus))
9601 		return;
9602 
9603 	if (radeon_pcie_gen2 == 0)
9604 		return;
9605 
9606 	if (rdev->flags & RADEON_IS_IGP)
9607 		return;
9608 
9609 	if (!(rdev->flags & RADEON_IS_PCIE))
9610 		return;
9611 
9612 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9613 	if (ret != 0)
9614 		return;
9615 
9616 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9617 		return;
9618 
9619 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9620 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9621 		LC_CURRENT_DATA_RATE_SHIFT;
9622 	if (mask & DRM_PCIE_SPEED_80) {
9623 		if (current_data_rate == 2) {
9624 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9625 			return;
9626 		}
9627 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9628 	} else if (mask & DRM_PCIE_SPEED_50) {
9629 		if (current_data_rate == 1) {
9630 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9631 			return;
9632 		}
9633 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9634 	}
9635 
9636 	bridge_pos = pci_pcie_cap(root);
9637 	if (!bridge_pos)
9638 		return;
9639 
9640 	gpu_pos = pci_pcie_cap(rdev->pdev);
9641 	if (!gpu_pos)
9642 		return;
9643 
9644 	if (mask & DRM_PCIE_SPEED_80) {
9645 		/* re-try equalization if gen3 is not already enabled */
9646 		if (current_data_rate != 2) {
9647 			u16 bridge_cfg, gpu_cfg;
9648 			u16 bridge_cfg2, gpu_cfg2;
9649 			u32 max_lw, current_lw, tmp;
9650 
9651 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9652 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9653 
9654 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9655 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9656 
9657 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9658 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9659 
9660 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9661 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9662 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9663 
9664 			if (current_lw < max_lw) {
9665 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9666 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9667 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9668 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9669 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9670 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9671 				}
9672 			}
9673 
9674 			for (i = 0; i < 10; i++) {
9675 				/* check status */
9676 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9677 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9678 					break;
9679 
9680 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9681 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9682 
9683 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9684 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9685 
9686 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9687 				tmp |= LC_SET_QUIESCE;
9688 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9689 
9690 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9691 				tmp |= LC_REDO_EQ;
9692 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9693 
9694 				mdelay(100);
9695 
9696 				/* linkctl */
9697 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9698 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9699 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9700 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9701 
9702 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9703 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9704 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9705 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9706 
9707 				/* linkctl2 */
9708 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9709 				tmp16 &= ~((1 << 4) | (7 << 9));
9710 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9711 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9712 
9713 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9714 				tmp16 &= ~((1 << 4) | (7 << 9));
9715 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9716 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9717 
9718 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9719 				tmp &= ~LC_SET_QUIESCE;
9720 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9721 			}
9722 		}
9723 	}
9724 
9725 	/* set the link speed */
9726 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9727 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9728 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9729 
9730 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9731 	tmp16 &= ~0xf;
9732 	if (mask & DRM_PCIE_SPEED_80)
9733 		tmp16 |= 3; /* gen3 */
9734 	else if (mask & DRM_PCIE_SPEED_50)
9735 		tmp16 |= 2; /* gen2 */
9736 	else
9737 		tmp16 |= 1; /* gen1 */
9738 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9739 
9740 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9741 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9742 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9743 
9744 	for (i = 0; i < rdev->usec_timeout; i++) {
9745 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9746 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9747 			break;
9748 		udelay(1);
9749 	}
9750 }
9751 
cik_program_aspm(struct radeon_device * rdev)9752 static void cik_program_aspm(struct radeon_device *rdev)
9753 {
9754 	u32 data, orig;
9755 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9756 	bool disable_clkreq = false;
9757 
9758 	if (radeon_aspm == 0)
9759 		return;
9760 
9761 	/* XXX double check IGPs */
9762 	if (rdev->flags & RADEON_IS_IGP)
9763 		return;
9764 
9765 	if (!(rdev->flags & RADEON_IS_PCIE))
9766 		return;
9767 
9768 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9769 	data &= ~LC_XMIT_N_FTS_MASK;
9770 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9771 	if (orig != data)
9772 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9773 
9774 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9775 	data |= LC_GO_TO_RECOVERY;
9776 	if (orig != data)
9777 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9778 
9779 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9780 	data |= P_IGNORE_EDB_ERR;
9781 	if (orig != data)
9782 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9783 
9784 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9785 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9786 	data |= LC_PMI_TO_L1_DIS;
9787 	if (!disable_l0s)
9788 		data |= LC_L0S_INACTIVITY(7);
9789 
9790 	if (!disable_l1) {
9791 		data |= LC_L1_INACTIVITY(7);
9792 		data &= ~LC_PMI_TO_L1_DIS;
9793 		if (orig != data)
9794 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9795 
9796 		if (!disable_plloff_in_l1) {
9797 			bool clk_req_support;
9798 
9799 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9800 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9801 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9802 			if (orig != data)
9803 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9804 
9805 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9806 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9807 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9808 			if (orig != data)
9809 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9810 
9811 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9812 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9813 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9814 			if (orig != data)
9815 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9816 
9817 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9818 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9819 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9820 			if (orig != data)
9821 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9822 
9823 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9824 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9825 			data |= LC_DYN_LANES_PWR_STATE(3);
9826 			if (orig != data)
9827 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9828 
9829 			if (!disable_clkreq &&
9830 			    !pci_is_root_bus(rdev->pdev->bus)) {
9831 				struct pci_dev *root = rdev->pdev->bus->self;
9832 				u32 lnkcap;
9833 
9834 				clk_req_support = false;
9835 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9836 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9837 					clk_req_support = true;
9838 			} else {
9839 				clk_req_support = false;
9840 			}
9841 
9842 			if (clk_req_support) {
9843 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9844 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9845 				if (orig != data)
9846 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9847 
9848 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9849 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9850 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9851 				if (orig != data)
9852 					WREG32_SMC(THM_CLK_CNTL, data);
9853 
9854 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9855 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9856 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9857 				if (orig != data)
9858 					WREG32_SMC(MISC_CLK_CTRL, data);
9859 
9860 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9861 				data &= ~BCLK_AS_XCLK;
9862 				if (orig != data)
9863 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9864 
9865 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9866 				data &= ~FORCE_BIF_REFCLK_EN;
9867 				if (orig != data)
9868 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9869 
9870 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9871 				data &= ~MPLL_CLKOUT_SEL_MASK;
9872 				data |= MPLL_CLKOUT_SEL(4);
9873 				if (orig != data)
9874 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9875 			}
9876 		}
9877 	} else {
9878 		if (orig != data)
9879 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9880 	}
9881 
9882 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9883 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9884 	if (orig != data)
9885 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9886 
9887 	if (!disable_l0s) {
9888 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9889 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9890 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9891 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9892 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9893 				data &= ~LC_L0S_INACTIVITY_MASK;
9894 				if (orig != data)
9895 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9896 			}
9897 		}
9898 	}
9899 }
9900