• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29 
30 #include <drm/drm_vblank.h>
31 #include <drm/radeon_drm.h>
32 
33 #include "atom.h"
34 #include "clearstate_si.h"
35 #include "radeon.h"
36 #include "radeon_asic.h"
37 #include "radeon_audio.h"
38 #include "radeon_ucode.h"
39 #include "si_blit_shaders.h"
40 #include "sid.h"
41 
42 
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
50 
51 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
52 MODULE_FIRMWARE("radeon/tahiti_me.bin");
53 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
54 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
55 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
56 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
57 
58 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
61 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
62 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
63 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
64 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
65 
66 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
69 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
70 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
71 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
72 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
75 MODULE_FIRMWARE("radeon/VERDE_me.bin");
76 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
77 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
78 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
79 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
80 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
81 
82 MODULE_FIRMWARE("radeon/verde_pfp.bin");
83 MODULE_FIRMWARE("radeon/verde_me.bin");
84 MODULE_FIRMWARE("radeon/verde_ce.bin");
85 MODULE_FIRMWARE("radeon/verde_mc.bin");
86 MODULE_FIRMWARE("radeon/verde_rlc.bin");
87 MODULE_FIRMWARE("radeon/verde_smc.bin");
88 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
89 
90 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
91 MODULE_FIRMWARE("radeon/OLAND_me.bin");
92 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
93 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
94 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
95 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
96 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
97 
98 MODULE_FIRMWARE("radeon/oland_pfp.bin");
99 MODULE_FIRMWARE("radeon/oland_me.bin");
100 MODULE_FIRMWARE("radeon/oland_ce.bin");
101 MODULE_FIRMWARE("radeon/oland_mc.bin");
102 MODULE_FIRMWARE("radeon/oland_rlc.bin");
103 MODULE_FIRMWARE("radeon/oland_smc.bin");
104 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
105 
106 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
109 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
110 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
111 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
112 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
113 
114 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
115 MODULE_FIRMWARE("radeon/hainan_me.bin");
116 MODULE_FIRMWARE("radeon/hainan_ce.bin");
117 MODULE_FIRMWARE("radeon/hainan_mc.bin");
118 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
119 MODULE_FIRMWARE("radeon/hainan_smc.bin");
120 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
121 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
122 
123 MODULE_FIRMWARE("radeon/si58_mc.bin");
124 
125 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
126 static void si_pcie_gen3_enable(struct radeon_device *rdev);
127 static void si_program_aspm(struct radeon_device *rdev);
128 extern void sumo_rlc_fini(struct radeon_device *rdev);
129 extern int sumo_rlc_init(struct radeon_device *rdev);
130 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
131 extern void r600_ih_ring_fini(struct radeon_device *rdev);
132 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
133 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
134 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
135 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
136 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
137 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
138 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
139 					 bool enable);
140 static void si_init_pg(struct radeon_device *rdev);
141 static void si_init_cg(struct radeon_device *rdev);
142 static void si_fini_pg(struct radeon_device *rdev);
143 static void si_fini_cg(struct radeon_device *rdev);
144 static void si_rlc_stop(struct radeon_device *rdev);
145 
146 static const u32 crtc_offsets[] =
147 {
148 	EVERGREEN_CRTC0_REGISTER_OFFSET,
149 	EVERGREEN_CRTC1_REGISTER_OFFSET,
150 	EVERGREEN_CRTC2_REGISTER_OFFSET,
151 	EVERGREEN_CRTC3_REGISTER_OFFSET,
152 	EVERGREEN_CRTC4_REGISTER_OFFSET,
153 	EVERGREEN_CRTC5_REGISTER_OFFSET
154 };
155 
156 static const u32 si_disp_int_status[] =
157 {
158 	DISP_INTERRUPT_STATUS,
159 	DISP_INTERRUPT_STATUS_CONTINUE,
160 	DISP_INTERRUPT_STATUS_CONTINUE2,
161 	DISP_INTERRUPT_STATUS_CONTINUE3,
162 	DISP_INTERRUPT_STATUS_CONTINUE4,
163 	DISP_INTERRUPT_STATUS_CONTINUE5
164 };
165 
166 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
167 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
168 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
169 
170 static const u32 verde_rlc_save_restore_register_list[] =
171 {
172 	(0x8000 << 16) | (0x98f4 >> 2),
173 	0x00000000,
174 	(0x8040 << 16) | (0x98f4 >> 2),
175 	0x00000000,
176 	(0x8000 << 16) | (0xe80 >> 2),
177 	0x00000000,
178 	(0x8040 << 16) | (0xe80 >> 2),
179 	0x00000000,
180 	(0x8000 << 16) | (0x89bc >> 2),
181 	0x00000000,
182 	(0x8040 << 16) | (0x89bc >> 2),
183 	0x00000000,
184 	(0x8000 << 16) | (0x8c1c >> 2),
185 	0x00000000,
186 	(0x8040 << 16) | (0x8c1c >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x98f0 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0xe7c >> 2),
191 	0x00000000,
192 	(0x8000 << 16) | (0x9148 >> 2),
193 	0x00000000,
194 	(0x8040 << 16) | (0x9148 >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x9150 >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0x897c >> 2),
199 	0x00000000,
200 	(0x9c00 << 16) | (0x8d8c >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0xac54 >> 2),
203 	0X00000000,
204 	0x3,
205 	(0x9c00 << 16) | (0x98f8 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9910 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9914 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9918 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x991c >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9920 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9924 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9928 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x992c >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9930 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x9934 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9938 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x993c >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9940 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x9944 >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9948 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x994c >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x9950 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x9954 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x9958 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x995c >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x9960 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x9964 >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9968 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0x996c >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0x9970 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x9974 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x9978 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x997c >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x9980 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x9984 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x9988 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x998c >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x8c00 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x8c14 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x8c04 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x8c08 >> 2),
278 	0x00000000,
279 	(0x8000 << 16) | (0x9b7c >> 2),
280 	0x00000000,
281 	(0x8040 << 16) | (0x9b7c >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0xe84 >> 2),
284 	0x00000000,
285 	(0x8040 << 16) | (0xe84 >> 2),
286 	0x00000000,
287 	(0x8000 << 16) | (0x89c0 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x89c0 >> 2),
290 	0x00000000,
291 	(0x8000 << 16) | (0x914c >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x914c >> 2),
294 	0x00000000,
295 	(0x8000 << 16) | (0x8c20 >> 2),
296 	0x00000000,
297 	(0x8040 << 16) | (0x8c20 >> 2),
298 	0x00000000,
299 	(0x8000 << 16) | (0x9354 >> 2),
300 	0x00000000,
301 	(0x8040 << 16) | (0x9354 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x9060 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x9364 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x9100 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x913c >> 2),
310 	0x00000000,
311 	(0x8000 << 16) | (0x90e0 >> 2),
312 	0x00000000,
313 	(0x8000 << 16) | (0x90e4 >> 2),
314 	0x00000000,
315 	(0x8000 << 16) | (0x90e8 >> 2),
316 	0x00000000,
317 	(0x8040 << 16) | (0x90e0 >> 2),
318 	0x00000000,
319 	(0x8040 << 16) | (0x90e4 >> 2),
320 	0x00000000,
321 	(0x8040 << 16) | (0x90e8 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x8bcc >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x8b24 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x88c4 >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x8e50 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x8c0c >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x8e58 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x8e5c >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9508 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x950c >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x9494 >> 2),
342 	0x00000000,
343 	(0x9c00 << 16) | (0xac0c >> 2),
344 	0x00000000,
345 	(0x9c00 << 16) | (0xac10 >> 2),
346 	0x00000000,
347 	(0x9c00 << 16) | (0xac14 >> 2),
348 	0x00000000,
349 	(0x9c00 << 16) | (0xae00 >> 2),
350 	0x00000000,
351 	(0x9c00 << 16) | (0xac08 >> 2),
352 	0x00000000,
353 	(0x9c00 << 16) | (0x88d4 >> 2),
354 	0x00000000,
355 	(0x9c00 << 16) | (0x88c8 >> 2),
356 	0x00000000,
357 	(0x9c00 << 16) | (0x88cc >> 2),
358 	0x00000000,
359 	(0x9c00 << 16) | (0x89b0 >> 2),
360 	0x00000000,
361 	(0x9c00 << 16) | (0x8b10 >> 2),
362 	0x00000000,
363 	(0x9c00 << 16) | (0x8a14 >> 2),
364 	0x00000000,
365 	(0x9c00 << 16) | (0x9830 >> 2),
366 	0x00000000,
367 	(0x9c00 << 16) | (0x9834 >> 2),
368 	0x00000000,
369 	(0x9c00 << 16) | (0x9838 >> 2),
370 	0x00000000,
371 	(0x9c00 << 16) | (0x9a10 >> 2),
372 	0x00000000,
373 	(0x8000 << 16) | (0x9870 >> 2),
374 	0x00000000,
375 	(0x8000 << 16) | (0x9874 >> 2),
376 	0x00000000,
377 	(0x8001 << 16) | (0x9870 >> 2),
378 	0x00000000,
379 	(0x8001 << 16) | (0x9874 >> 2),
380 	0x00000000,
381 	(0x8040 << 16) | (0x9870 >> 2),
382 	0x00000000,
383 	(0x8040 << 16) | (0x9874 >> 2),
384 	0x00000000,
385 	(0x8041 << 16) | (0x9870 >> 2),
386 	0x00000000,
387 	(0x8041 << 16) | (0x9874 >> 2),
388 	0x00000000,
389 	0x00000000
390 };
391 
392 static const u32 tahiti_golden_rlc_registers[] =
393 {
394 	0xc424, 0xffffffff, 0x00601005,
395 	0xc47c, 0xffffffff, 0x10104040,
396 	0xc488, 0xffffffff, 0x0100000a,
397 	0xc314, 0xffffffff, 0x00000800,
398 	0xc30c, 0xffffffff, 0x800000f4,
399 	0xf4a8, 0xffffffff, 0x00000000
400 };
401 
402 static const u32 tahiti_golden_registers[] =
403 {
404 	0x9a10, 0x00010000, 0x00018208,
405 	0x9830, 0xffffffff, 0x00000000,
406 	0x9834, 0xf00fffff, 0x00000400,
407 	0x9838, 0x0002021c, 0x00020200,
408 	0xc78, 0x00000080, 0x00000000,
409 	0xd030, 0x000300c0, 0x00800040,
410 	0xd830, 0x000300c0, 0x00800040,
411 	0x5bb0, 0x000000f0, 0x00000070,
412 	0x5bc0, 0x00200000, 0x50100000,
413 	0x7030, 0x31000311, 0x00000011,
414 	0x277c, 0x00000003, 0x000007ff,
415 	0x240c, 0x000007ff, 0x00000000,
416 	0x8a14, 0xf000001f, 0x00000007,
417 	0x8b24, 0xffffffff, 0x00ffffff,
418 	0x8b10, 0x0000ff0f, 0x00000000,
419 	0x28a4c, 0x07ffffff, 0x4e000000,
420 	0x28350, 0x3f3f3fff, 0x2a00126a,
421 	0x30, 0x000000ff, 0x0040,
422 	0x34, 0x00000040, 0x00004040,
423 	0x9100, 0x07ffffff, 0x03000000,
424 	0x8e88, 0x01ff1f3f, 0x00000000,
425 	0x8e84, 0x01ff1f3f, 0x00000000,
426 	0x9060, 0x0000007f, 0x00000020,
427 	0x9508, 0x00010000, 0x00010000,
428 	0xac14, 0x00000200, 0x000002fb,
429 	0xac10, 0xffffffff, 0x0000543b,
430 	0xac0c, 0xffffffff, 0xa9210876,
431 	0x88d0, 0xffffffff, 0x000fff40,
432 	0x88d4, 0x0000001f, 0x00000010,
433 	0x1410, 0x20000000, 0x20fffed8,
434 	0x15c0, 0x000c0fc0, 0x000c0400
435 };
436 
437 static const u32 tahiti_golden_registers2[] =
438 {
439 	0xc64, 0x00000001, 0x00000001
440 };
441 
442 static const u32 pitcairn_golden_rlc_registers[] =
443 {
444 	0xc424, 0xffffffff, 0x00601004,
445 	0xc47c, 0xffffffff, 0x10102020,
446 	0xc488, 0xffffffff, 0x01000020,
447 	0xc314, 0xffffffff, 0x00000800,
448 	0xc30c, 0xffffffff, 0x800000a4
449 };
450 
451 static const u32 pitcairn_golden_registers[] =
452 {
453 	0x9a10, 0x00010000, 0x00018208,
454 	0x9830, 0xffffffff, 0x00000000,
455 	0x9834, 0xf00fffff, 0x00000400,
456 	0x9838, 0x0002021c, 0x00020200,
457 	0xc78, 0x00000080, 0x00000000,
458 	0xd030, 0x000300c0, 0x00800040,
459 	0xd830, 0x000300c0, 0x00800040,
460 	0x5bb0, 0x000000f0, 0x00000070,
461 	0x5bc0, 0x00200000, 0x50100000,
462 	0x7030, 0x31000311, 0x00000011,
463 	0x2ae4, 0x00073ffe, 0x000022a2,
464 	0x240c, 0x000007ff, 0x00000000,
465 	0x8a14, 0xf000001f, 0x00000007,
466 	0x8b24, 0xffffffff, 0x00ffffff,
467 	0x8b10, 0x0000ff0f, 0x00000000,
468 	0x28a4c, 0x07ffffff, 0x4e000000,
469 	0x28350, 0x3f3f3fff, 0x2a00126a,
470 	0x30, 0x000000ff, 0x0040,
471 	0x34, 0x00000040, 0x00004040,
472 	0x9100, 0x07ffffff, 0x03000000,
473 	0x9060, 0x0000007f, 0x00000020,
474 	0x9508, 0x00010000, 0x00010000,
475 	0xac14, 0x000003ff, 0x000000f7,
476 	0xac10, 0xffffffff, 0x00000000,
477 	0xac0c, 0xffffffff, 0x32761054,
478 	0x88d4, 0x0000001f, 0x00000010,
479 	0x15c0, 0x000c0fc0, 0x000c0400
480 };
481 
482 static const u32 verde_golden_rlc_registers[] =
483 {
484 	0xc424, 0xffffffff, 0x033f1005,
485 	0xc47c, 0xffffffff, 0x10808020,
486 	0xc488, 0xffffffff, 0x00800008,
487 	0xc314, 0xffffffff, 0x00001000,
488 	0xc30c, 0xffffffff, 0x80010014
489 };
490 
491 static const u32 verde_golden_registers[] =
492 {
493 	0x9a10, 0x00010000, 0x00018208,
494 	0x9830, 0xffffffff, 0x00000000,
495 	0x9834, 0xf00fffff, 0x00000400,
496 	0x9838, 0x0002021c, 0x00020200,
497 	0xc78, 0x00000080, 0x00000000,
498 	0xd030, 0x000300c0, 0x00800040,
499 	0xd030, 0x000300c0, 0x00800040,
500 	0xd830, 0x000300c0, 0x00800040,
501 	0xd830, 0x000300c0, 0x00800040,
502 	0x5bb0, 0x000000f0, 0x00000070,
503 	0x5bc0, 0x00200000, 0x50100000,
504 	0x7030, 0x31000311, 0x00000011,
505 	0x2ae4, 0x00073ffe, 0x000022a2,
506 	0x2ae4, 0x00073ffe, 0x000022a2,
507 	0x2ae4, 0x00073ffe, 0x000022a2,
508 	0x240c, 0x000007ff, 0x00000000,
509 	0x240c, 0x000007ff, 0x00000000,
510 	0x240c, 0x000007ff, 0x00000000,
511 	0x8a14, 0xf000001f, 0x00000007,
512 	0x8a14, 0xf000001f, 0x00000007,
513 	0x8a14, 0xf000001f, 0x00000007,
514 	0x8b24, 0xffffffff, 0x00ffffff,
515 	0x8b10, 0x0000ff0f, 0x00000000,
516 	0x28a4c, 0x07ffffff, 0x4e000000,
517 	0x28350, 0x3f3f3fff, 0x0000124a,
518 	0x28350, 0x3f3f3fff, 0x0000124a,
519 	0x28350, 0x3f3f3fff, 0x0000124a,
520 	0x30, 0x000000ff, 0x0040,
521 	0x34, 0x00000040, 0x00004040,
522 	0x9100, 0x07ffffff, 0x03000000,
523 	0x9100, 0x07ffffff, 0x03000000,
524 	0x8e88, 0x01ff1f3f, 0x00000000,
525 	0x8e88, 0x01ff1f3f, 0x00000000,
526 	0x8e88, 0x01ff1f3f, 0x00000000,
527 	0x8e84, 0x01ff1f3f, 0x00000000,
528 	0x8e84, 0x01ff1f3f, 0x00000000,
529 	0x8e84, 0x01ff1f3f, 0x00000000,
530 	0x9060, 0x0000007f, 0x00000020,
531 	0x9508, 0x00010000, 0x00010000,
532 	0xac14, 0x000003ff, 0x00000003,
533 	0xac14, 0x000003ff, 0x00000003,
534 	0xac14, 0x000003ff, 0x00000003,
535 	0xac10, 0xffffffff, 0x00000000,
536 	0xac10, 0xffffffff, 0x00000000,
537 	0xac10, 0xffffffff, 0x00000000,
538 	0xac0c, 0xffffffff, 0x00001032,
539 	0xac0c, 0xffffffff, 0x00001032,
540 	0xac0c, 0xffffffff, 0x00001032,
541 	0x88d4, 0x0000001f, 0x00000010,
542 	0x88d4, 0x0000001f, 0x00000010,
543 	0x88d4, 0x0000001f, 0x00000010,
544 	0x15c0, 0x000c0fc0, 0x000c0400
545 };
546 
547 static const u32 oland_golden_rlc_registers[] =
548 {
549 	0xc424, 0xffffffff, 0x00601005,
550 	0xc47c, 0xffffffff, 0x10104040,
551 	0xc488, 0xffffffff, 0x0100000a,
552 	0xc314, 0xffffffff, 0x00000800,
553 	0xc30c, 0xffffffff, 0x800000f4
554 };
555 
556 static const u32 oland_golden_registers[] =
557 {
558 	0x9a10, 0x00010000, 0x00018208,
559 	0x9830, 0xffffffff, 0x00000000,
560 	0x9834, 0xf00fffff, 0x00000400,
561 	0x9838, 0x0002021c, 0x00020200,
562 	0xc78, 0x00000080, 0x00000000,
563 	0xd030, 0x000300c0, 0x00800040,
564 	0xd830, 0x000300c0, 0x00800040,
565 	0x5bb0, 0x000000f0, 0x00000070,
566 	0x5bc0, 0x00200000, 0x50100000,
567 	0x7030, 0x31000311, 0x00000011,
568 	0x2ae4, 0x00073ffe, 0x000022a2,
569 	0x240c, 0x000007ff, 0x00000000,
570 	0x8a14, 0xf000001f, 0x00000007,
571 	0x8b24, 0xffffffff, 0x00ffffff,
572 	0x8b10, 0x0000ff0f, 0x00000000,
573 	0x28a4c, 0x07ffffff, 0x4e000000,
574 	0x28350, 0x3f3f3fff, 0x00000082,
575 	0x30, 0x000000ff, 0x0040,
576 	0x34, 0x00000040, 0x00004040,
577 	0x9100, 0x07ffffff, 0x03000000,
578 	0x9060, 0x0000007f, 0x00000020,
579 	0x9508, 0x00010000, 0x00010000,
580 	0xac14, 0x000003ff, 0x000000f3,
581 	0xac10, 0xffffffff, 0x00000000,
582 	0xac0c, 0xffffffff, 0x00003210,
583 	0x88d4, 0x0000001f, 0x00000010,
584 	0x15c0, 0x000c0fc0, 0x000c0400
585 };
586 
587 static const u32 hainan_golden_registers[] =
588 {
589 	0x9a10, 0x00010000, 0x00018208,
590 	0x9830, 0xffffffff, 0x00000000,
591 	0x9834, 0xf00fffff, 0x00000400,
592 	0x9838, 0x0002021c, 0x00020200,
593 	0xd0c0, 0xff000fff, 0x00000100,
594 	0xd030, 0x000300c0, 0x00800040,
595 	0xd8c0, 0xff000fff, 0x00000100,
596 	0xd830, 0x000300c0, 0x00800040,
597 	0x2ae4, 0x00073ffe, 0x000022a2,
598 	0x240c, 0x000007ff, 0x00000000,
599 	0x8a14, 0xf000001f, 0x00000007,
600 	0x8b24, 0xffffffff, 0x00ffffff,
601 	0x8b10, 0x0000ff0f, 0x00000000,
602 	0x28a4c, 0x07ffffff, 0x4e000000,
603 	0x28350, 0x3f3f3fff, 0x00000000,
604 	0x30, 0x000000ff, 0x0040,
605 	0x34, 0x00000040, 0x00004040,
606 	0x9100, 0x03e00000, 0x03600000,
607 	0x9060, 0x0000007f, 0x00000020,
608 	0x9508, 0x00010000, 0x00010000,
609 	0xac14, 0x000003ff, 0x000000f1,
610 	0xac10, 0xffffffff, 0x00000000,
611 	0xac0c, 0xffffffff, 0x00003210,
612 	0x88d4, 0x0000001f, 0x00000010,
613 	0x15c0, 0x000c0fc0, 0x000c0400
614 };
615 
616 static const u32 hainan_golden_registers2[] =
617 {
618 	0x98f8, 0xffffffff, 0x02010001
619 };
620 
621 static const u32 tahiti_mgcg_cgcg_init[] =
622 {
623 	0xc400, 0xffffffff, 0xfffffffc,
624 	0x802c, 0xffffffff, 0xe0000000,
625 	0x9a60, 0xffffffff, 0x00000100,
626 	0x92a4, 0xffffffff, 0x00000100,
627 	0xc164, 0xffffffff, 0x00000100,
628 	0x9774, 0xffffffff, 0x00000100,
629 	0x8984, 0xffffffff, 0x06000100,
630 	0x8a18, 0xffffffff, 0x00000100,
631 	0x92a0, 0xffffffff, 0x00000100,
632 	0xc380, 0xffffffff, 0x00000100,
633 	0x8b28, 0xffffffff, 0x00000100,
634 	0x9144, 0xffffffff, 0x00000100,
635 	0x8d88, 0xffffffff, 0x00000100,
636 	0x8d8c, 0xffffffff, 0x00000100,
637 	0x9030, 0xffffffff, 0x00000100,
638 	0x9034, 0xffffffff, 0x00000100,
639 	0x9038, 0xffffffff, 0x00000100,
640 	0x903c, 0xffffffff, 0x00000100,
641 	0xad80, 0xffffffff, 0x00000100,
642 	0xac54, 0xffffffff, 0x00000100,
643 	0x897c, 0xffffffff, 0x06000100,
644 	0x9868, 0xffffffff, 0x00000100,
645 	0x9510, 0xffffffff, 0x00000100,
646 	0xaf04, 0xffffffff, 0x00000100,
647 	0xae04, 0xffffffff, 0x00000100,
648 	0x949c, 0xffffffff, 0x00000100,
649 	0x802c, 0xffffffff, 0xe0000000,
650 	0x9160, 0xffffffff, 0x00010000,
651 	0x9164, 0xffffffff, 0x00030002,
652 	0x9168, 0xffffffff, 0x00040007,
653 	0x916c, 0xffffffff, 0x00060005,
654 	0x9170, 0xffffffff, 0x00090008,
655 	0x9174, 0xffffffff, 0x00020001,
656 	0x9178, 0xffffffff, 0x00040003,
657 	0x917c, 0xffffffff, 0x00000007,
658 	0x9180, 0xffffffff, 0x00060005,
659 	0x9184, 0xffffffff, 0x00090008,
660 	0x9188, 0xffffffff, 0x00030002,
661 	0x918c, 0xffffffff, 0x00050004,
662 	0x9190, 0xffffffff, 0x00000008,
663 	0x9194, 0xffffffff, 0x00070006,
664 	0x9198, 0xffffffff, 0x000a0009,
665 	0x919c, 0xffffffff, 0x00040003,
666 	0x91a0, 0xffffffff, 0x00060005,
667 	0x91a4, 0xffffffff, 0x00000009,
668 	0x91a8, 0xffffffff, 0x00080007,
669 	0x91ac, 0xffffffff, 0x000b000a,
670 	0x91b0, 0xffffffff, 0x00050004,
671 	0x91b4, 0xffffffff, 0x00070006,
672 	0x91b8, 0xffffffff, 0x0008000b,
673 	0x91bc, 0xffffffff, 0x000a0009,
674 	0x91c0, 0xffffffff, 0x000d000c,
675 	0x91c4, 0xffffffff, 0x00060005,
676 	0x91c8, 0xffffffff, 0x00080007,
677 	0x91cc, 0xffffffff, 0x0000000b,
678 	0x91d0, 0xffffffff, 0x000a0009,
679 	0x91d4, 0xffffffff, 0x000d000c,
680 	0x91d8, 0xffffffff, 0x00070006,
681 	0x91dc, 0xffffffff, 0x00090008,
682 	0x91e0, 0xffffffff, 0x0000000c,
683 	0x91e4, 0xffffffff, 0x000b000a,
684 	0x91e8, 0xffffffff, 0x000e000d,
685 	0x91ec, 0xffffffff, 0x00080007,
686 	0x91f0, 0xffffffff, 0x000a0009,
687 	0x91f4, 0xffffffff, 0x0000000d,
688 	0x91f8, 0xffffffff, 0x000c000b,
689 	0x91fc, 0xffffffff, 0x000f000e,
690 	0x9200, 0xffffffff, 0x00090008,
691 	0x9204, 0xffffffff, 0x000b000a,
692 	0x9208, 0xffffffff, 0x000c000f,
693 	0x920c, 0xffffffff, 0x000e000d,
694 	0x9210, 0xffffffff, 0x00110010,
695 	0x9214, 0xffffffff, 0x000a0009,
696 	0x9218, 0xffffffff, 0x000c000b,
697 	0x921c, 0xffffffff, 0x0000000f,
698 	0x9220, 0xffffffff, 0x000e000d,
699 	0x9224, 0xffffffff, 0x00110010,
700 	0x9228, 0xffffffff, 0x000b000a,
701 	0x922c, 0xffffffff, 0x000d000c,
702 	0x9230, 0xffffffff, 0x00000010,
703 	0x9234, 0xffffffff, 0x000f000e,
704 	0x9238, 0xffffffff, 0x00120011,
705 	0x923c, 0xffffffff, 0x000c000b,
706 	0x9240, 0xffffffff, 0x000e000d,
707 	0x9244, 0xffffffff, 0x00000011,
708 	0x9248, 0xffffffff, 0x0010000f,
709 	0x924c, 0xffffffff, 0x00130012,
710 	0x9250, 0xffffffff, 0x000d000c,
711 	0x9254, 0xffffffff, 0x000f000e,
712 	0x9258, 0xffffffff, 0x00100013,
713 	0x925c, 0xffffffff, 0x00120011,
714 	0x9260, 0xffffffff, 0x00150014,
715 	0x9264, 0xffffffff, 0x000e000d,
716 	0x9268, 0xffffffff, 0x0010000f,
717 	0x926c, 0xffffffff, 0x00000013,
718 	0x9270, 0xffffffff, 0x00120011,
719 	0x9274, 0xffffffff, 0x00150014,
720 	0x9278, 0xffffffff, 0x000f000e,
721 	0x927c, 0xffffffff, 0x00110010,
722 	0x9280, 0xffffffff, 0x00000014,
723 	0x9284, 0xffffffff, 0x00130012,
724 	0x9288, 0xffffffff, 0x00160015,
725 	0x928c, 0xffffffff, 0x0010000f,
726 	0x9290, 0xffffffff, 0x00120011,
727 	0x9294, 0xffffffff, 0x00000015,
728 	0x9298, 0xffffffff, 0x00140013,
729 	0x929c, 0xffffffff, 0x00170016,
730 	0x9150, 0xffffffff, 0x96940200,
731 	0x8708, 0xffffffff, 0x00900100,
732 	0xc478, 0xffffffff, 0x00000080,
733 	0xc404, 0xffffffff, 0x0020003f,
734 	0x30, 0xffffffff, 0x0000001c,
735 	0x34, 0x000f0000, 0x000f0000,
736 	0x160c, 0xffffffff, 0x00000100,
737 	0x1024, 0xffffffff, 0x00000100,
738 	0x102c, 0x00000101, 0x00000000,
739 	0x20a8, 0xffffffff, 0x00000104,
740 	0x264c, 0x000c0000, 0x000c0000,
741 	0x2648, 0x000c0000, 0x000c0000,
742 	0x55e4, 0xff000fff, 0x00000100,
743 	0x55e8, 0x00000001, 0x00000001,
744 	0x2f50, 0x00000001, 0x00000001,
745 	0x30cc, 0xc0000fff, 0x00000104,
746 	0xc1e4, 0x00000001, 0x00000001,
747 	0xd0c0, 0xfffffff0, 0x00000100,
748 	0xd8c0, 0xfffffff0, 0x00000100
749 };
750 
751 static const u32 pitcairn_mgcg_cgcg_init[] =
752 {
753 	0xc400, 0xffffffff, 0xfffffffc,
754 	0x802c, 0xffffffff, 0xe0000000,
755 	0x9a60, 0xffffffff, 0x00000100,
756 	0x92a4, 0xffffffff, 0x00000100,
757 	0xc164, 0xffffffff, 0x00000100,
758 	0x9774, 0xffffffff, 0x00000100,
759 	0x8984, 0xffffffff, 0x06000100,
760 	0x8a18, 0xffffffff, 0x00000100,
761 	0x92a0, 0xffffffff, 0x00000100,
762 	0xc380, 0xffffffff, 0x00000100,
763 	0x8b28, 0xffffffff, 0x00000100,
764 	0x9144, 0xffffffff, 0x00000100,
765 	0x8d88, 0xffffffff, 0x00000100,
766 	0x8d8c, 0xffffffff, 0x00000100,
767 	0x9030, 0xffffffff, 0x00000100,
768 	0x9034, 0xffffffff, 0x00000100,
769 	0x9038, 0xffffffff, 0x00000100,
770 	0x903c, 0xffffffff, 0x00000100,
771 	0xad80, 0xffffffff, 0x00000100,
772 	0xac54, 0xffffffff, 0x00000100,
773 	0x897c, 0xffffffff, 0x06000100,
774 	0x9868, 0xffffffff, 0x00000100,
775 	0x9510, 0xffffffff, 0x00000100,
776 	0xaf04, 0xffffffff, 0x00000100,
777 	0xae04, 0xffffffff, 0x00000100,
778 	0x949c, 0xffffffff, 0x00000100,
779 	0x802c, 0xffffffff, 0xe0000000,
780 	0x9160, 0xffffffff, 0x00010000,
781 	0x9164, 0xffffffff, 0x00030002,
782 	0x9168, 0xffffffff, 0x00040007,
783 	0x916c, 0xffffffff, 0x00060005,
784 	0x9170, 0xffffffff, 0x00090008,
785 	0x9174, 0xffffffff, 0x00020001,
786 	0x9178, 0xffffffff, 0x00040003,
787 	0x917c, 0xffffffff, 0x00000007,
788 	0x9180, 0xffffffff, 0x00060005,
789 	0x9184, 0xffffffff, 0x00090008,
790 	0x9188, 0xffffffff, 0x00030002,
791 	0x918c, 0xffffffff, 0x00050004,
792 	0x9190, 0xffffffff, 0x00000008,
793 	0x9194, 0xffffffff, 0x00070006,
794 	0x9198, 0xffffffff, 0x000a0009,
795 	0x919c, 0xffffffff, 0x00040003,
796 	0x91a0, 0xffffffff, 0x00060005,
797 	0x91a4, 0xffffffff, 0x00000009,
798 	0x91a8, 0xffffffff, 0x00080007,
799 	0x91ac, 0xffffffff, 0x000b000a,
800 	0x91b0, 0xffffffff, 0x00050004,
801 	0x91b4, 0xffffffff, 0x00070006,
802 	0x91b8, 0xffffffff, 0x0008000b,
803 	0x91bc, 0xffffffff, 0x000a0009,
804 	0x91c0, 0xffffffff, 0x000d000c,
805 	0x9200, 0xffffffff, 0x00090008,
806 	0x9204, 0xffffffff, 0x000b000a,
807 	0x9208, 0xffffffff, 0x000c000f,
808 	0x920c, 0xffffffff, 0x000e000d,
809 	0x9210, 0xffffffff, 0x00110010,
810 	0x9214, 0xffffffff, 0x000a0009,
811 	0x9218, 0xffffffff, 0x000c000b,
812 	0x921c, 0xffffffff, 0x0000000f,
813 	0x9220, 0xffffffff, 0x000e000d,
814 	0x9224, 0xffffffff, 0x00110010,
815 	0x9228, 0xffffffff, 0x000b000a,
816 	0x922c, 0xffffffff, 0x000d000c,
817 	0x9230, 0xffffffff, 0x00000010,
818 	0x9234, 0xffffffff, 0x000f000e,
819 	0x9238, 0xffffffff, 0x00120011,
820 	0x923c, 0xffffffff, 0x000c000b,
821 	0x9240, 0xffffffff, 0x000e000d,
822 	0x9244, 0xffffffff, 0x00000011,
823 	0x9248, 0xffffffff, 0x0010000f,
824 	0x924c, 0xffffffff, 0x00130012,
825 	0x9250, 0xffffffff, 0x000d000c,
826 	0x9254, 0xffffffff, 0x000f000e,
827 	0x9258, 0xffffffff, 0x00100013,
828 	0x925c, 0xffffffff, 0x00120011,
829 	0x9260, 0xffffffff, 0x00150014,
830 	0x9150, 0xffffffff, 0x96940200,
831 	0x8708, 0xffffffff, 0x00900100,
832 	0xc478, 0xffffffff, 0x00000080,
833 	0xc404, 0xffffffff, 0x0020003f,
834 	0x30, 0xffffffff, 0x0000001c,
835 	0x34, 0x000f0000, 0x000f0000,
836 	0x160c, 0xffffffff, 0x00000100,
837 	0x1024, 0xffffffff, 0x00000100,
838 	0x102c, 0x00000101, 0x00000000,
839 	0x20a8, 0xffffffff, 0x00000104,
840 	0x55e4, 0xff000fff, 0x00000100,
841 	0x55e8, 0x00000001, 0x00000001,
842 	0x2f50, 0x00000001, 0x00000001,
843 	0x30cc, 0xc0000fff, 0x00000104,
844 	0xc1e4, 0x00000001, 0x00000001,
845 	0xd0c0, 0xfffffff0, 0x00000100,
846 	0xd8c0, 0xfffffff0, 0x00000100
847 };
848 
849 static const u32 verde_mgcg_cgcg_init[] =
850 {
851 	0xc400, 0xffffffff, 0xfffffffc,
852 	0x802c, 0xffffffff, 0xe0000000,
853 	0x9a60, 0xffffffff, 0x00000100,
854 	0x92a4, 0xffffffff, 0x00000100,
855 	0xc164, 0xffffffff, 0x00000100,
856 	0x9774, 0xffffffff, 0x00000100,
857 	0x8984, 0xffffffff, 0x06000100,
858 	0x8a18, 0xffffffff, 0x00000100,
859 	0x92a0, 0xffffffff, 0x00000100,
860 	0xc380, 0xffffffff, 0x00000100,
861 	0x8b28, 0xffffffff, 0x00000100,
862 	0x9144, 0xffffffff, 0x00000100,
863 	0x8d88, 0xffffffff, 0x00000100,
864 	0x8d8c, 0xffffffff, 0x00000100,
865 	0x9030, 0xffffffff, 0x00000100,
866 	0x9034, 0xffffffff, 0x00000100,
867 	0x9038, 0xffffffff, 0x00000100,
868 	0x903c, 0xffffffff, 0x00000100,
869 	0xad80, 0xffffffff, 0x00000100,
870 	0xac54, 0xffffffff, 0x00000100,
871 	0x897c, 0xffffffff, 0x06000100,
872 	0x9868, 0xffffffff, 0x00000100,
873 	0x9510, 0xffffffff, 0x00000100,
874 	0xaf04, 0xffffffff, 0x00000100,
875 	0xae04, 0xffffffff, 0x00000100,
876 	0x949c, 0xffffffff, 0x00000100,
877 	0x802c, 0xffffffff, 0xe0000000,
878 	0x9160, 0xffffffff, 0x00010000,
879 	0x9164, 0xffffffff, 0x00030002,
880 	0x9168, 0xffffffff, 0x00040007,
881 	0x916c, 0xffffffff, 0x00060005,
882 	0x9170, 0xffffffff, 0x00090008,
883 	0x9174, 0xffffffff, 0x00020001,
884 	0x9178, 0xffffffff, 0x00040003,
885 	0x917c, 0xffffffff, 0x00000007,
886 	0x9180, 0xffffffff, 0x00060005,
887 	0x9184, 0xffffffff, 0x00090008,
888 	0x9188, 0xffffffff, 0x00030002,
889 	0x918c, 0xffffffff, 0x00050004,
890 	0x9190, 0xffffffff, 0x00000008,
891 	0x9194, 0xffffffff, 0x00070006,
892 	0x9198, 0xffffffff, 0x000a0009,
893 	0x919c, 0xffffffff, 0x00040003,
894 	0x91a0, 0xffffffff, 0x00060005,
895 	0x91a4, 0xffffffff, 0x00000009,
896 	0x91a8, 0xffffffff, 0x00080007,
897 	0x91ac, 0xffffffff, 0x000b000a,
898 	0x91b0, 0xffffffff, 0x00050004,
899 	0x91b4, 0xffffffff, 0x00070006,
900 	0x91b8, 0xffffffff, 0x0008000b,
901 	0x91bc, 0xffffffff, 0x000a0009,
902 	0x91c0, 0xffffffff, 0x000d000c,
903 	0x9200, 0xffffffff, 0x00090008,
904 	0x9204, 0xffffffff, 0x000b000a,
905 	0x9208, 0xffffffff, 0x000c000f,
906 	0x920c, 0xffffffff, 0x000e000d,
907 	0x9210, 0xffffffff, 0x00110010,
908 	0x9214, 0xffffffff, 0x000a0009,
909 	0x9218, 0xffffffff, 0x000c000b,
910 	0x921c, 0xffffffff, 0x0000000f,
911 	0x9220, 0xffffffff, 0x000e000d,
912 	0x9224, 0xffffffff, 0x00110010,
913 	0x9228, 0xffffffff, 0x000b000a,
914 	0x922c, 0xffffffff, 0x000d000c,
915 	0x9230, 0xffffffff, 0x00000010,
916 	0x9234, 0xffffffff, 0x000f000e,
917 	0x9238, 0xffffffff, 0x00120011,
918 	0x923c, 0xffffffff, 0x000c000b,
919 	0x9240, 0xffffffff, 0x000e000d,
920 	0x9244, 0xffffffff, 0x00000011,
921 	0x9248, 0xffffffff, 0x0010000f,
922 	0x924c, 0xffffffff, 0x00130012,
923 	0x9250, 0xffffffff, 0x000d000c,
924 	0x9254, 0xffffffff, 0x000f000e,
925 	0x9258, 0xffffffff, 0x00100013,
926 	0x925c, 0xffffffff, 0x00120011,
927 	0x9260, 0xffffffff, 0x00150014,
928 	0x9150, 0xffffffff, 0x96940200,
929 	0x8708, 0xffffffff, 0x00900100,
930 	0xc478, 0xffffffff, 0x00000080,
931 	0xc404, 0xffffffff, 0x0020003f,
932 	0x30, 0xffffffff, 0x0000001c,
933 	0x34, 0x000f0000, 0x000f0000,
934 	0x160c, 0xffffffff, 0x00000100,
935 	0x1024, 0xffffffff, 0x00000100,
936 	0x102c, 0x00000101, 0x00000000,
937 	0x20a8, 0xffffffff, 0x00000104,
938 	0x264c, 0x000c0000, 0x000c0000,
939 	0x2648, 0x000c0000, 0x000c0000,
940 	0x55e4, 0xff000fff, 0x00000100,
941 	0x55e8, 0x00000001, 0x00000001,
942 	0x2f50, 0x00000001, 0x00000001,
943 	0x30cc, 0xc0000fff, 0x00000104,
944 	0xc1e4, 0x00000001, 0x00000001,
945 	0xd0c0, 0xfffffff0, 0x00000100,
946 	0xd8c0, 0xfffffff0, 0x00000100
947 };
948 
949 static const u32 oland_mgcg_cgcg_init[] =
950 {
951 	0xc400, 0xffffffff, 0xfffffffc,
952 	0x802c, 0xffffffff, 0xe0000000,
953 	0x9a60, 0xffffffff, 0x00000100,
954 	0x92a4, 0xffffffff, 0x00000100,
955 	0xc164, 0xffffffff, 0x00000100,
956 	0x9774, 0xffffffff, 0x00000100,
957 	0x8984, 0xffffffff, 0x06000100,
958 	0x8a18, 0xffffffff, 0x00000100,
959 	0x92a0, 0xffffffff, 0x00000100,
960 	0xc380, 0xffffffff, 0x00000100,
961 	0x8b28, 0xffffffff, 0x00000100,
962 	0x9144, 0xffffffff, 0x00000100,
963 	0x8d88, 0xffffffff, 0x00000100,
964 	0x8d8c, 0xffffffff, 0x00000100,
965 	0x9030, 0xffffffff, 0x00000100,
966 	0x9034, 0xffffffff, 0x00000100,
967 	0x9038, 0xffffffff, 0x00000100,
968 	0x903c, 0xffffffff, 0x00000100,
969 	0xad80, 0xffffffff, 0x00000100,
970 	0xac54, 0xffffffff, 0x00000100,
971 	0x897c, 0xffffffff, 0x06000100,
972 	0x9868, 0xffffffff, 0x00000100,
973 	0x9510, 0xffffffff, 0x00000100,
974 	0xaf04, 0xffffffff, 0x00000100,
975 	0xae04, 0xffffffff, 0x00000100,
976 	0x949c, 0xffffffff, 0x00000100,
977 	0x802c, 0xffffffff, 0xe0000000,
978 	0x9160, 0xffffffff, 0x00010000,
979 	0x9164, 0xffffffff, 0x00030002,
980 	0x9168, 0xffffffff, 0x00040007,
981 	0x916c, 0xffffffff, 0x00060005,
982 	0x9170, 0xffffffff, 0x00090008,
983 	0x9174, 0xffffffff, 0x00020001,
984 	0x9178, 0xffffffff, 0x00040003,
985 	0x917c, 0xffffffff, 0x00000007,
986 	0x9180, 0xffffffff, 0x00060005,
987 	0x9184, 0xffffffff, 0x00090008,
988 	0x9188, 0xffffffff, 0x00030002,
989 	0x918c, 0xffffffff, 0x00050004,
990 	0x9190, 0xffffffff, 0x00000008,
991 	0x9194, 0xffffffff, 0x00070006,
992 	0x9198, 0xffffffff, 0x000a0009,
993 	0x919c, 0xffffffff, 0x00040003,
994 	0x91a0, 0xffffffff, 0x00060005,
995 	0x91a4, 0xffffffff, 0x00000009,
996 	0x91a8, 0xffffffff, 0x00080007,
997 	0x91ac, 0xffffffff, 0x000b000a,
998 	0x91b0, 0xffffffff, 0x00050004,
999 	0x91b4, 0xffffffff, 0x00070006,
1000 	0x91b8, 0xffffffff, 0x0008000b,
1001 	0x91bc, 0xffffffff, 0x000a0009,
1002 	0x91c0, 0xffffffff, 0x000d000c,
1003 	0x91c4, 0xffffffff, 0x00060005,
1004 	0x91c8, 0xffffffff, 0x00080007,
1005 	0x91cc, 0xffffffff, 0x0000000b,
1006 	0x91d0, 0xffffffff, 0x000a0009,
1007 	0x91d4, 0xffffffff, 0x000d000c,
1008 	0x9150, 0xffffffff, 0x96940200,
1009 	0x8708, 0xffffffff, 0x00900100,
1010 	0xc478, 0xffffffff, 0x00000080,
1011 	0xc404, 0xffffffff, 0x0020003f,
1012 	0x30, 0xffffffff, 0x0000001c,
1013 	0x34, 0x000f0000, 0x000f0000,
1014 	0x160c, 0xffffffff, 0x00000100,
1015 	0x1024, 0xffffffff, 0x00000100,
1016 	0x102c, 0x00000101, 0x00000000,
1017 	0x20a8, 0xffffffff, 0x00000104,
1018 	0x264c, 0x000c0000, 0x000c0000,
1019 	0x2648, 0x000c0000, 0x000c0000,
1020 	0x55e4, 0xff000fff, 0x00000100,
1021 	0x55e8, 0x00000001, 0x00000001,
1022 	0x2f50, 0x00000001, 0x00000001,
1023 	0x30cc, 0xc0000fff, 0x00000104,
1024 	0xc1e4, 0x00000001, 0x00000001,
1025 	0xd0c0, 0xfffffff0, 0x00000100,
1026 	0xd8c0, 0xfffffff0, 0x00000100
1027 };
1028 
1029 static const u32 hainan_mgcg_cgcg_init[] =
1030 {
1031 	0xc400, 0xffffffff, 0xfffffffc,
1032 	0x802c, 0xffffffff, 0xe0000000,
1033 	0x9a60, 0xffffffff, 0x00000100,
1034 	0x92a4, 0xffffffff, 0x00000100,
1035 	0xc164, 0xffffffff, 0x00000100,
1036 	0x9774, 0xffffffff, 0x00000100,
1037 	0x8984, 0xffffffff, 0x06000100,
1038 	0x8a18, 0xffffffff, 0x00000100,
1039 	0x92a0, 0xffffffff, 0x00000100,
1040 	0xc380, 0xffffffff, 0x00000100,
1041 	0x8b28, 0xffffffff, 0x00000100,
1042 	0x9144, 0xffffffff, 0x00000100,
1043 	0x8d88, 0xffffffff, 0x00000100,
1044 	0x8d8c, 0xffffffff, 0x00000100,
1045 	0x9030, 0xffffffff, 0x00000100,
1046 	0x9034, 0xffffffff, 0x00000100,
1047 	0x9038, 0xffffffff, 0x00000100,
1048 	0x903c, 0xffffffff, 0x00000100,
1049 	0xad80, 0xffffffff, 0x00000100,
1050 	0xac54, 0xffffffff, 0x00000100,
1051 	0x897c, 0xffffffff, 0x06000100,
1052 	0x9868, 0xffffffff, 0x00000100,
1053 	0x9510, 0xffffffff, 0x00000100,
1054 	0xaf04, 0xffffffff, 0x00000100,
1055 	0xae04, 0xffffffff, 0x00000100,
1056 	0x949c, 0xffffffff, 0x00000100,
1057 	0x802c, 0xffffffff, 0xe0000000,
1058 	0x9160, 0xffffffff, 0x00010000,
1059 	0x9164, 0xffffffff, 0x00030002,
1060 	0x9168, 0xffffffff, 0x00040007,
1061 	0x916c, 0xffffffff, 0x00060005,
1062 	0x9170, 0xffffffff, 0x00090008,
1063 	0x9174, 0xffffffff, 0x00020001,
1064 	0x9178, 0xffffffff, 0x00040003,
1065 	0x917c, 0xffffffff, 0x00000007,
1066 	0x9180, 0xffffffff, 0x00060005,
1067 	0x9184, 0xffffffff, 0x00090008,
1068 	0x9188, 0xffffffff, 0x00030002,
1069 	0x918c, 0xffffffff, 0x00050004,
1070 	0x9190, 0xffffffff, 0x00000008,
1071 	0x9194, 0xffffffff, 0x00070006,
1072 	0x9198, 0xffffffff, 0x000a0009,
1073 	0x919c, 0xffffffff, 0x00040003,
1074 	0x91a0, 0xffffffff, 0x00060005,
1075 	0x91a4, 0xffffffff, 0x00000009,
1076 	0x91a8, 0xffffffff, 0x00080007,
1077 	0x91ac, 0xffffffff, 0x000b000a,
1078 	0x91b0, 0xffffffff, 0x00050004,
1079 	0x91b4, 0xffffffff, 0x00070006,
1080 	0x91b8, 0xffffffff, 0x0008000b,
1081 	0x91bc, 0xffffffff, 0x000a0009,
1082 	0x91c0, 0xffffffff, 0x000d000c,
1083 	0x91c4, 0xffffffff, 0x00060005,
1084 	0x91c8, 0xffffffff, 0x00080007,
1085 	0x91cc, 0xffffffff, 0x0000000b,
1086 	0x91d0, 0xffffffff, 0x000a0009,
1087 	0x91d4, 0xffffffff, 0x000d000c,
1088 	0x9150, 0xffffffff, 0x96940200,
1089 	0x8708, 0xffffffff, 0x00900100,
1090 	0xc478, 0xffffffff, 0x00000080,
1091 	0xc404, 0xffffffff, 0x0020003f,
1092 	0x30, 0xffffffff, 0x0000001c,
1093 	0x34, 0x000f0000, 0x000f0000,
1094 	0x160c, 0xffffffff, 0x00000100,
1095 	0x1024, 0xffffffff, 0x00000100,
1096 	0x20a8, 0xffffffff, 0x00000104,
1097 	0x264c, 0x000c0000, 0x000c0000,
1098 	0x2648, 0x000c0000, 0x000c0000,
1099 	0x2f50, 0x00000001, 0x00000001,
1100 	0x30cc, 0xc0000fff, 0x00000104,
1101 	0xc1e4, 0x00000001, 0x00000001,
1102 	0xd0c0, 0xfffffff0, 0x00000100,
1103 	0xd8c0, 0xfffffff0, 0x00000100
1104 };
1105 
1106 static u32 verde_pg_init[] =
1107 {
1108 	0x353c, 0xffffffff, 0x40000,
1109 	0x3538, 0xffffffff, 0x200010ff,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x7007,
1116 	0x3538, 0xffffffff, 0x300010ff,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x400000,
1123 	0x3538, 0xffffffff, 0x100010ff,
1124 	0x353c, 0xffffffff, 0x0,
1125 	0x353c, 0xffffffff, 0x0,
1126 	0x353c, 0xffffffff, 0x0,
1127 	0x353c, 0xffffffff, 0x0,
1128 	0x353c, 0xffffffff, 0x0,
1129 	0x353c, 0xffffffff, 0x120200,
1130 	0x3538, 0xffffffff, 0x500010ff,
1131 	0x353c, 0xffffffff, 0x0,
1132 	0x353c, 0xffffffff, 0x0,
1133 	0x353c, 0xffffffff, 0x0,
1134 	0x353c, 0xffffffff, 0x0,
1135 	0x353c, 0xffffffff, 0x0,
1136 	0x353c, 0xffffffff, 0x1e1e16,
1137 	0x3538, 0xffffffff, 0x600010ff,
1138 	0x353c, 0xffffffff, 0x0,
1139 	0x353c, 0xffffffff, 0x0,
1140 	0x353c, 0xffffffff, 0x0,
1141 	0x353c, 0xffffffff, 0x0,
1142 	0x353c, 0xffffffff, 0x0,
1143 	0x353c, 0xffffffff, 0x171f1e,
1144 	0x3538, 0xffffffff, 0x700010ff,
1145 	0x353c, 0xffffffff, 0x0,
1146 	0x353c, 0xffffffff, 0x0,
1147 	0x353c, 0xffffffff, 0x0,
1148 	0x353c, 0xffffffff, 0x0,
1149 	0x353c, 0xffffffff, 0x0,
1150 	0x353c, 0xffffffff, 0x0,
1151 	0x3538, 0xffffffff, 0x9ff,
1152 	0x3500, 0xffffffff, 0x0,
1153 	0x3504, 0xffffffff, 0x10000800,
1154 	0x3504, 0xffffffff, 0xf,
1155 	0x3504, 0xffffffff, 0xf,
1156 	0x3500, 0xffffffff, 0x4,
1157 	0x3504, 0xffffffff, 0x1000051e,
1158 	0x3504, 0xffffffff, 0xffff,
1159 	0x3504, 0xffffffff, 0xffff,
1160 	0x3500, 0xffffffff, 0x8,
1161 	0x3504, 0xffffffff, 0x80500,
1162 	0x3500, 0xffffffff, 0x12,
1163 	0x3504, 0xffffffff, 0x9050c,
1164 	0x3500, 0xffffffff, 0x1d,
1165 	0x3504, 0xffffffff, 0xb052c,
1166 	0x3500, 0xffffffff, 0x2a,
1167 	0x3504, 0xffffffff, 0x1053e,
1168 	0x3500, 0xffffffff, 0x2d,
1169 	0x3504, 0xffffffff, 0x10546,
1170 	0x3500, 0xffffffff, 0x30,
1171 	0x3504, 0xffffffff, 0xa054e,
1172 	0x3500, 0xffffffff, 0x3c,
1173 	0x3504, 0xffffffff, 0x1055f,
1174 	0x3500, 0xffffffff, 0x3f,
1175 	0x3504, 0xffffffff, 0x10567,
1176 	0x3500, 0xffffffff, 0x42,
1177 	0x3504, 0xffffffff, 0x1056f,
1178 	0x3500, 0xffffffff, 0x45,
1179 	0x3504, 0xffffffff, 0x10572,
1180 	0x3500, 0xffffffff, 0x48,
1181 	0x3504, 0xffffffff, 0x20575,
1182 	0x3500, 0xffffffff, 0x4c,
1183 	0x3504, 0xffffffff, 0x190801,
1184 	0x3500, 0xffffffff, 0x67,
1185 	0x3504, 0xffffffff, 0x1082a,
1186 	0x3500, 0xffffffff, 0x6a,
1187 	0x3504, 0xffffffff, 0x1b082d,
1188 	0x3500, 0xffffffff, 0x87,
1189 	0x3504, 0xffffffff, 0x310851,
1190 	0x3500, 0xffffffff, 0xba,
1191 	0x3504, 0xffffffff, 0x891,
1192 	0x3500, 0xffffffff, 0xbc,
1193 	0x3504, 0xffffffff, 0x893,
1194 	0x3500, 0xffffffff, 0xbe,
1195 	0x3504, 0xffffffff, 0x20895,
1196 	0x3500, 0xffffffff, 0xc2,
1197 	0x3504, 0xffffffff, 0x20899,
1198 	0x3500, 0xffffffff, 0xc6,
1199 	0x3504, 0xffffffff, 0x2089d,
1200 	0x3500, 0xffffffff, 0xca,
1201 	0x3504, 0xffffffff, 0x8a1,
1202 	0x3500, 0xffffffff, 0xcc,
1203 	0x3504, 0xffffffff, 0x8a3,
1204 	0x3500, 0xffffffff, 0xce,
1205 	0x3504, 0xffffffff, 0x308a5,
1206 	0x3500, 0xffffffff, 0xd3,
1207 	0x3504, 0xffffffff, 0x6d08cd,
1208 	0x3500, 0xffffffff, 0x142,
1209 	0x3504, 0xffffffff, 0x2000095a,
1210 	0x3504, 0xffffffff, 0x1,
1211 	0x3500, 0xffffffff, 0x144,
1212 	0x3504, 0xffffffff, 0x301f095b,
1213 	0x3500, 0xffffffff, 0x165,
1214 	0x3504, 0xffffffff, 0xc094d,
1215 	0x3500, 0xffffffff, 0x173,
1216 	0x3504, 0xffffffff, 0xf096d,
1217 	0x3500, 0xffffffff, 0x184,
1218 	0x3504, 0xffffffff, 0x15097f,
1219 	0x3500, 0xffffffff, 0x19b,
1220 	0x3504, 0xffffffff, 0xc0998,
1221 	0x3500, 0xffffffff, 0x1a9,
1222 	0x3504, 0xffffffff, 0x409a7,
1223 	0x3500, 0xffffffff, 0x1af,
1224 	0x3504, 0xffffffff, 0xcdc,
1225 	0x3500, 0xffffffff, 0x1b1,
1226 	0x3504, 0xffffffff, 0x800,
1227 	0x3508, 0xffffffff, 0x6c9b2000,
1228 	0x3510, 0xfc00, 0x2000,
1229 	0x3544, 0xffffffff, 0xfc0,
1230 	0x28d4, 0x00000100, 0x100
1231 };
1232 
si_init_golden_registers(struct radeon_device * rdev)1233 static void si_init_golden_registers(struct radeon_device *rdev)
1234 {
1235 	switch (rdev->family) {
1236 	case CHIP_TAHITI:
1237 		radeon_program_register_sequence(rdev,
1238 						 tahiti_golden_registers,
1239 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1240 		radeon_program_register_sequence(rdev,
1241 						 tahiti_golden_rlc_registers,
1242 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1243 		radeon_program_register_sequence(rdev,
1244 						 tahiti_mgcg_cgcg_init,
1245 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1246 		radeon_program_register_sequence(rdev,
1247 						 tahiti_golden_registers2,
1248 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1249 		break;
1250 	case CHIP_PITCAIRN:
1251 		radeon_program_register_sequence(rdev,
1252 						 pitcairn_golden_registers,
1253 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1254 		radeon_program_register_sequence(rdev,
1255 						 pitcairn_golden_rlc_registers,
1256 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1257 		radeon_program_register_sequence(rdev,
1258 						 pitcairn_mgcg_cgcg_init,
1259 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1260 		break;
1261 	case CHIP_VERDE:
1262 		radeon_program_register_sequence(rdev,
1263 						 verde_golden_registers,
1264 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1265 		radeon_program_register_sequence(rdev,
1266 						 verde_golden_rlc_registers,
1267 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1268 		radeon_program_register_sequence(rdev,
1269 						 verde_mgcg_cgcg_init,
1270 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1271 		radeon_program_register_sequence(rdev,
1272 						 verde_pg_init,
1273 						 (const u32)ARRAY_SIZE(verde_pg_init));
1274 		break;
1275 	case CHIP_OLAND:
1276 		radeon_program_register_sequence(rdev,
1277 						 oland_golden_registers,
1278 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1279 		radeon_program_register_sequence(rdev,
1280 						 oland_golden_rlc_registers,
1281 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1282 		radeon_program_register_sequence(rdev,
1283 						 oland_mgcg_cgcg_init,
1284 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1285 		break;
1286 	case CHIP_HAINAN:
1287 		radeon_program_register_sequence(rdev,
1288 						 hainan_golden_registers,
1289 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1290 		radeon_program_register_sequence(rdev,
1291 						 hainan_golden_registers2,
1292 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1293 		radeon_program_register_sequence(rdev,
1294 						 hainan_mgcg_cgcg_init,
1295 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1296 		break;
1297 	default:
1298 		break;
1299 	}
1300 }
1301 
1302 /**
1303  * si_get_allowed_info_register - fetch the register for the info ioctl
1304  *
1305  * @rdev: radeon_device pointer
1306  * @reg: register offset in bytes
1307  * @val: register value
1308  *
1309  * Returns 0 for success or -EINVAL for an invalid register
1310  *
1311  */
si_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)1312 int si_get_allowed_info_register(struct radeon_device *rdev,
1313 				 u32 reg, u32 *val)
1314 {
1315 	switch (reg) {
1316 	case GRBM_STATUS:
1317 	case GRBM_STATUS2:
1318 	case GRBM_STATUS_SE0:
1319 	case GRBM_STATUS_SE1:
1320 	case SRBM_STATUS:
1321 	case SRBM_STATUS2:
1322 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1323 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1324 	case UVD_STATUS:
1325 		*val = RREG32(reg);
1326 		return 0;
1327 	default:
1328 		return -EINVAL;
1329 	}
1330 }
1331 
1332 #define PCIE_BUS_CLK                10000
1333 #define TCLK                        (PCIE_BUS_CLK / 10)
1334 
1335 /**
1336  * si_get_xclk - get the xclk
1337  *
1338  * @rdev: radeon_device pointer
1339  *
1340  * Returns the reference clock used by the gfx engine
1341  * (SI).
1342  */
si_get_xclk(struct radeon_device * rdev)1343 u32 si_get_xclk(struct radeon_device *rdev)
1344 {
1345 	u32 reference_clock = rdev->clock.spll.reference_freq;
1346 	u32 tmp;
1347 
1348 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1349 	if (tmp & MUX_TCLK_TO_XCLK)
1350 		return TCLK;
1351 
1352 	tmp = RREG32(CG_CLKPIN_CNTL);
1353 	if (tmp & XTALIN_DIVIDE)
1354 		return reference_clock / 4;
1355 
1356 	return reference_clock;
1357 }
1358 
1359 /* get temperature in millidegrees */
si_get_temp(struct radeon_device * rdev)1360 int si_get_temp(struct radeon_device *rdev)
1361 {
1362 	u32 temp;
1363 	int actual_temp = 0;
1364 
1365 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1366 		CTF_TEMP_SHIFT;
1367 
1368 	if (temp & 0x200)
1369 		actual_temp = 255;
1370 	else
1371 		actual_temp = temp & 0x1ff;
1372 
1373 	actual_temp = (actual_temp * 1000);
1374 
1375 	return actual_temp;
1376 }
1377 
1378 #define TAHITI_IO_MC_REGS_SIZE 36
1379 
1380 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1381 	{0x0000006f, 0x03044000},
1382 	{0x00000070, 0x0480c018},
1383 	{0x00000071, 0x00000040},
1384 	{0x00000072, 0x01000000},
1385 	{0x00000074, 0x000000ff},
1386 	{0x00000075, 0x00143400},
1387 	{0x00000076, 0x08ec0800},
1388 	{0x00000077, 0x040000cc},
1389 	{0x00000079, 0x00000000},
1390 	{0x0000007a, 0x21000409},
1391 	{0x0000007c, 0x00000000},
1392 	{0x0000007d, 0xe8000000},
1393 	{0x0000007e, 0x044408a8},
1394 	{0x0000007f, 0x00000003},
1395 	{0x00000080, 0x00000000},
1396 	{0x00000081, 0x01000000},
1397 	{0x00000082, 0x02000000},
1398 	{0x00000083, 0x00000000},
1399 	{0x00000084, 0xe3f3e4f4},
1400 	{0x00000085, 0x00052024},
1401 	{0x00000087, 0x00000000},
1402 	{0x00000088, 0x66036603},
1403 	{0x00000089, 0x01000000},
1404 	{0x0000008b, 0x1c0a0000},
1405 	{0x0000008c, 0xff010000},
1406 	{0x0000008e, 0xffffefff},
1407 	{0x0000008f, 0xfff3efff},
1408 	{0x00000090, 0xfff3efbf},
1409 	{0x00000094, 0x00101101},
1410 	{0x00000095, 0x00000fff},
1411 	{0x00000096, 0x00116fff},
1412 	{0x00000097, 0x60010000},
1413 	{0x00000098, 0x10010000},
1414 	{0x00000099, 0x00006000},
1415 	{0x0000009a, 0x00001000},
1416 	{0x0000009f, 0x00a77400}
1417 };
1418 
1419 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1420 	{0x0000006f, 0x03044000},
1421 	{0x00000070, 0x0480c018},
1422 	{0x00000071, 0x00000040},
1423 	{0x00000072, 0x01000000},
1424 	{0x00000074, 0x000000ff},
1425 	{0x00000075, 0x00143400},
1426 	{0x00000076, 0x08ec0800},
1427 	{0x00000077, 0x040000cc},
1428 	{0x00000079, 0x00000000},
1429 	{0x0000007a, 0x21000409},
1430 	{0x0000007c, 0x00000000},
1431 	{0x0000007d, 0xe8000000},
1432 	{0x0000007e, 0x044408a8},
1433 	{0x0000007f, 0x00000003},
1434 	{0x00000080, 0x00000000},
1435 	{0x00000081, 0x01000000},
1436 	{0x00000082, 0x02000000},
1437 	{0x00000083, 0x00000000},
1438 	{0x00000084, 0xe3f3e4f4},
1439 	{0x00000085, 0x00052024},
1440 	{0x00000087, 0x00000000},
1441 	{0x00000088, 0x66036603},
1442 	{0x00000089, 0x01000000},
1443 	{0x0000008b, 0x1c0a0000},
1444 	{0x0000008c, 0xff010000},
1445 	{0x0000008e, 0xffffefff},
1446 	{0x0000008f, 0xfff3efff},
1447 	{0x00000090, 0xfff3efbf},
1448 	{0x00000094, 0x00101101},
1449 	{0x00000095, 0x00000fff},
1450 	{0x00000096, 0x00116fff},
1451 	{0x00000097, 0x60010000},
1452 	{0x00000098, 0x10010000},
1453 	{0x00000099, 0x00006000},
1454 	{0x0000009a, 0x00001000},
1455 	{0x0000009f, 0x00a47400}
1456 };
1457 
1458 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1459 	{0x0000006f, 0x03044000},
1460 	{0x00000070, 0x0480c018},
1461 	{0x00000071, 0x00000040},
1462 	{0x00000072, 0x01000000},
1463 	{0x00000074, 0x000000ff},
1464 	{0x00000075, 0x00143400},
1465 	{0x00000076, 0x08ec0800},
1466 	{0x00000077, 0x040000cc},
1467 	{0x00000079, 0x00000000},
1468 	{0x0000007a, 0x21000409},
1469 	{0x0000007c, 0x00000000},
1470 	{0x0000007d, 0xe8000000},
1471 	{0x0000007e, 0x044408a8},
1472 	{0x0000007f, 0x00000003},
1473 	{0x00000080, 0x00000000},
1474 	{0x00000081, 0x01000000},
1475 	{0x00000082, 0x02000000},
1476 	{0x00000083, 0x00000000},
1477 	{0x00000084, 0xe3f3e4f4},
1478 	{0x00000085, 0x00052024},
1479 	{0x00000087, 0x00000000},
1480 	{0x00000088, 0x66036603},
1481 	{0x00000089, 0x01000000},
1482 	{0x0000008b, 0x1c0a0000},
1483 	{0x0000008c, 0xff010000},
1484 	{0x0000008e, 0xffffefff},
1485 	{0x0000008f, 0xfff3efff},
1486 	{0x00000090, 0xfff3efbf},
1487 	{0x00000094, 0x00101101},
1488 	{0x00000095, 0x00000fff},
1489 	{0x00000096, 0x00116fff},
1490 	{0x00000097, 0x60010000},
1491 	{0x00000098, 0x10010000},
1492 	{0x00000099, 0x00006000},
1493 	{0x0000009a, 0x00001000},
1494 	{0x0000009f, 0x00a37400}
1495 };
1496 
1497 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1498 	{0x0000006f, 0x03044000},
1499 	{0x00000070, 0x0480c018},
1500 	{0x00000071, 0x00000040},
1501 	{0x00000072, 0x01000000},
1502 	{0x00000074, 0x000000ff},
1503 	{0x00000075, 0x00143400},
1504 	{0x00000076, 0x08ec0800},
1505 	{0x00000077, 0x040000cc},
1506 	{0x00000079, 0x00000000},
1507 	{0x0000007a, 0x21000409},
1508 	{0x0000007c, 0x00000000},
1509 	{0x0000007d, 0xe8000000},
1510 	{0x0000007e, 0x044408a8},
1511 	{0x0000007f, 0x00000003},
1512 	{0x00000080, 0x00000000},
1513 	{0x00000081, 0x01000000},
1514 	{0x00000082, 0x02000000},
1515 	{0x00000083, 0x00000000},
1516 	{0x00000084, 0xe3f3e4f4},
1517 	{0x00000085, 0x00052024},
1518 	{0x00000087, 0x00000000},
1519 	{0x00000088, 0x66036603},
1520 	{0x00000089, 0x01000000},
1521 	{0x0000008b, 0x1c0a0000},
1522 	{0x0000008c, 0xff010000},
1523 	{0x0000008e, 0xffffefff},
1524 	{0x0000008f, 0xfff3efff},
1525 	{0x00000090, 0xfff3efbf},
1526 	{0x00000094, 0x00101101},
1527 	{0x00000095, 0x00000fff},
1528 	{0x00000096, 0x00116fff},
1529 	{0x00000097, 0x60010000},
1530 	{0x00000098, 0x10010000},
1531 	{0x00000099, 0x00006000},
1532 	{0x0000009a, 0x00001000},
1533 	{0x0000009f, 0x00a17730}
1534 };
1535 
1536 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1537 	{0x0000006f, 0x03044000},
1538 	{0x00000070, 0x0480c018},
1539 	{0x00000071, 0x00000040},
1540 	{0x00000072, 0x01000000},
1541 	{0x00000074, 0x000000ff},
1542 	{0x00000075, 0x00143400},
1543 	{0x00000076, 0x08ec0800},
1544 	{0x00000077, 0x040000cc},
1545 	{0x00000079, 0x00000000},
1546 	{0x0000007a, 0x21000409},
1547 	{0x0000007c, 0x00000000},
1548 	{0x0000007d, 0xe8000000},
1549 	{0x0000007e, 0x044408a8},
1550 	{0x0000007f, 0x00000003},
1551 	{0x00000080, 0x00000000},
1552 	{0x00000081, 0x01000000},
1553 	{0x00000082, 0x02000000},
1554 	{0x00000083, 0x00000000},
1555 	{0x00000084, 0xe3f3e4f4},
1556 	{0x00000085, 0x00052024},
1557 	{0x00000087, 0x00000000},
1558 	{0x00000088, 0x66036603},
1559 	{0x00000089, 0x01000000},
1560 	{0x0000008b, 0x1c0a0000},
1561 	{0x0000008c, 0xff010000},
1562 	{0x0000008e, 0xffffefff},
1563 	{0x0000008f, 0xfff3efff},
1564 	{0x00000090, 0xfff3efbf},
1565 	{0x00000094, 0x00101101},
1566 	{0x00000095, 0x00000fff},
1567 	{0x00000096, 0x00116fff},
1568 	{0x00000097, 0x60010000},
1569 	{0x00000098, 0x10010000},
1570 	{0x00000099, 0x00006000},
1571 	{0x0000009a, 0x00001000},
1572 	{0x0000009f, 0x00a07730}
1573 };
1574 
1575 /* ucode loading */
si_mc_load_microcode(struct radeon_device * rdev)1576 int si_mc_load_microcode(struct radeon_device *rdev)
1577 {
1578 	const __be32 *fw_data = NULL;
1579 	const __le32 *new_fw_data = NULL;
1580 	u32 running;
1581 	u32 *io_mc_regs = NULL;
1582 	const __le32 *new_io_mc_regs = NULL;
1583 	int i, regs_size, ucode_size;
1584 
1585 	if (!rdev->mc_fw)
1586 		return -EINVAL;
1587 
1588 	if (rdev->new_fw) {
1589 		const struct mc_firmware_header_v1_0 *hdr =
1590 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1591 
1592 		radeon_ucode_print_mc_hdr(&hdr->header);
1593 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1594 		new_io_mc_regs = (const __le32 *)
1595 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1596 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1597 		new_fw_data = (const __le32 *)
1598 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1599 	} else {
1600 		ucode_size = rdev->mc_fw->size / 4;
1601 
1602 		switch (rdev->family) {
1603 		case CHIP_TAHITI:
1604 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1605 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1606 			break;
1607 		case CHIP_PITCAIRN:
1608 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1609 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1610 			break;
1611 		case CHIP_VERDE:
1612 		default:
1613 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1614 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1615 			break;
1616 		case CHIP_OLAND:
1617 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1618 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1619 			break;
1620 		case CHIP_HAINAN:
1621 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1622 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1623 			break;
1624 		}
1625 		fw_data = (const __be32 *)rdev->mc_fw->data;
1626 	}
1627 
1628 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1629 
1630 	if (running == 0) {
1631 		/* reset the engine and set to writable */
1632 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1633 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1634 
1635 		/* load mc io regs */
1636 		for (i = 0; i < regs_size; i++) {
1637 			if (rdev->new_fw) {
1638 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1639 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1640 			} else {
1641 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1642 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1643 			}
1644 		}
1645 		/* load the MC ucode */
1646 		for (i = 0; i < ucode_size; i++) {
1647 			if (rdev->new_fw)
1648 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1649 			else
1650 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1651 		}
1652 
1653 		/* put the engine back into the active state */
1654 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1655 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1656 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1657 
1658 		/* wait for training to complete */
1659 		for (i = 0; i < rdev->usec_timeout; i++) {
1660 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1661 				break;
1662 			udelay(1);
1663 		}
1664 		for (i = 0; i < rdev->usec_timeout; i++) {
1665 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1666 				break;
1667 			udelay(1);
1668 		}
1669 	}
1670 
1671 	return 0;
1672 }
1673 
si_init_microcode(struct radeon_device * rdev)1674 static int si_init_microcode(struct radeon_device *rdev)
1675 {
1676 	const char *chip_name;
1677 	const char *new_chip_name;
1678 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1679 	size_t smc_req_size, mc2_req_size;
1680 	char fw_name[30];
1681 	int err;
1682 	int new_fw = 0;
1683 	bool new_smc = false;
1684 	bool si58_fw = false;
1685 	bool banks2_fw = false;
1686 
1687 	DRM_DEBUG("\n");
1688 
1689 	switch (rdev->family) {
1690 	case CHIP_TAHITI:
1691 		chip_name = "TAHITI";
1692 		new_chip_name = "tahiti";
1693 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1694 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1695 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1696 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1697 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1698 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1699 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1700 		break;
1701 	case CHIP_PITCAIRN:
1702 		chip_name = "PITCAIRN";
1703 		if ((rdev->pdev->revision == 0x81) &&
1704 		    ((rdev->pdev->device == 0x6810) ||
1705 		     (rdev->pdev->device == 0x6811)))
1706 			new_smc = true;
1707 		new_chip_name = "pitcairn";
1708 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1709 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1710 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1711 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1712 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1713 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1714 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1715 		break;
1716 	case CHIP_VERDE:
1717 		chip_name = "VERDE";
1718 		if (((rdev->pdev->device == 0x6820) &&
1719 		     ((rdev->pdev->revision == 0x81) ||
1720 		      (rdev->pdev->revision == 0x83))) ||
1721 		    ((rdev->pdev->device == 0x6821) &&
1722 		     ((rdev->pdev->revision == 0x83) ||
1723 		      (rdev->pdev->revision == 0x87))) ||
1724 		    ((rdev->pdev->revision == 0x87) &&
1725 		     ((rdev->pdev->device == 0x6823) ||
1726 		      (rdev->pdev->device == 0x682b))))
1727 			new_smc = true;
1728 		new_chip_name = "verde";
1729 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1730 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1731 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1732 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1733 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1734 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1735 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1736 		break;
1737 	case CHIP_OLAND:
1738 		chip_name = "OLAND";
1739 		if (((rdev->pdev->revision == 0x81) &&
1740 		     ((rdev->pdev->device == 0x6600) ||
1741 		      (rdev->pdev->device == 0x6604) ||
1742 		      (rdev->pdev->device == 0x6605) ||
1743 		      (rdev->pdev->device == 0x6610))) ||
1744 		    ((rdev->pdev->revision == 0x83) &&
1745 		     (rdev->pdev->device == 0x6610)))
1746 			new_smc = true;
1747 		new_chip_name = "oland";
1748 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1749 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1750 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1751 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1752 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1753 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1754 		break;
1755 	case CHIP_HAINAN:
1756 		chip_name = "HAINAN";
1757 		if (((rdev->pdev->revision == 0x81) &&
1758 		     (rdev->pdev->device == 0x6660)) ||
1759 		    ((rdev->pdev->revision == 0x83) &&
1760 		     ((rdev->pdev->device == 0x6660) ||
1761 		      (rdev->pdev->device == 0x6663) ||
1762 		      (rdev->pdev->device == 0x6665) ||
1763 		      (rdev->pdev->device == 0x6667))))
1764 			new_smc = true;
1765 		else if ((rdev->pdev->revision == 0xc3) &&
1766 			 (rdev->pdev->device == 0x6665))
1767 			banks2_fw = true;
1768 		new_chip_name = "hainan";
1769 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1770 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1771 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1772 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1773 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1774 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1775 		break;
1776 	default: BUG();
1777 	}
1778 
1779 	/* this memory configuration requires special firmware */
1780 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1781 		si58_fw = true;
1782 
1783 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1784 
1785 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1786 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1787 	if (err) {
1788 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1789 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1790 		if (err)
1791 			goto out;
1792 		if (rdev->pfp_fw->size != pfp_req_size) {
1793 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1794 			       rdev->pfp_fw->size, fw_name);
1795 			err = -EINVAL;
1796 			goto out;
1797 		}
1798 	} else {
1799 		err = radeon_ucode_validate(rdev->pfp_fw);
1800 		if (err) {
1801 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1802 			       fw_name);
1803 			goto out;
1804 		} else {
1805 			new_fw++;
1806 		}
1807 	}
1808 
1809 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1810 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1811 	if (err) {
1812 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1813 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1814 		if (err)
1815 			goto out;
1816 		if (rdev->me_fw->size != me_req_size) {
1817 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1818 			       rdev->me_fw->size, fw_name);
1819 			err = -EINVAL;
1820 		}
1821 	} else {
1822 		err = radeon_ucode_validate(rdev->me_fw);
1823 		if (err) {
1824 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1825 			       fw_name);
1826 			goto out;
1827 		} else {
1828 			new_fw++;
1829 		}
1830 	}
1831 
1832 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1833 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1834 	if (err) {
1835 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1836 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1837 		if (err)
1838 			goto out;
1839 		if (rdev->ce_fw->size != ce_req_size) {
1840 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1841 			       rdev->ce_fw->size, fw_name);
1842 			err = -EINVAL;
1843 		}
1844 	} else {
1845 		err = radeon_ucode_validate(rdev->ce_fw);
1846 		if (err) {
1847 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1848 			       fw_name);
1849 			goto out;
1850 		} else {
1851 			new_fw++;
1852 		}
1853 	}
1854 
1855 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1856 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1857 	if (err) {
1858 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1859 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1860 		if (err)
1861 			goto out;
1862 		if (rdev->rlc_fw->size != rlc_req_size) {
1863 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1864 			       rdev->rlc_fw->size, fw_name);
1865 			err = -EINVAL;
1866 		}
1867 	} else {
1868 		err = radeon_ucode_validate(rdev->rlc_fw);
1869 		if (err) {
1870 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1871 			       fw_name);
1872 			goto out;
1873 		} else {
1874 			new_fw++;
1875 		}
1876 	}
1877 
1878 	if (si58_fw)
1879 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1880 	else
1881 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1882 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1883 	if (err) {
1884 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1885 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1886 		if (err) {
1887 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1888 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1889 			if (err)
1890 				goto out;
1891 		}
1892 		if ((rdev->mc_fw->size != mc_req_size) &&
1893 		    (rdev->mc_fw->size != mc2_req_size)) {
1894 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1895 			       rdev->mc_fw->size, fw_name);
1896 			err = -EINVAL;
1897 		}
1898 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1899 	} else {
1900 		err = radeon_ucode_validate(rdev->mc_fw);
1901 		if (err) {
1902 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1903 			       fw_name);
1904 			goto out;
1905 		} else {
1906 			new_fw++;
1907 		}
1908 	}
1909 
1910 	if (banks2_fw)
1911 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1912 	else if (new_smc)
1913 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1914 	else
1915 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1916 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1917 	if (err) {
1918 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1919 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1920 		if (err) {
1921 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1922 			release_firmware(rdev->smc_fw);
1923 			rdev->smc_fw = NULL;
1924 			err = 0;
1925 		} else if (rdev->smc_fw->size != smc_req_size) {
1926 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1927 			       rdev->smc_fw->size, fw_name);
1928 			err = -EINVAL;
1929 		}
1930 	} else {
1931 		err = radeon_ucode_validate(rdev->smc_fw);
1932 		if (err) {
1933 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1934 			       fw_name);
1935 			goto out;
1936 		} else {
1937 			new_fw++;
1938 		}
1939 	}
1940 
1941 	if (new_fw == 0) {
1942 		rdev->new_fw = false;
1943 	} else if (new_fw < 6) {
1944 		pr_err("si_fw: mixing new and old firmware!\n");
1945 		err = -EINVAL;
1946 	} else {
1947 		rdev->new_fw = true;
1948 	}
1949 out:
1950 	if (err) {
1951 		if (err != -EINVAL)
1952 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1953 			       fw_name);
1954 		release_firmware(rdev->pfp_fw);
1955 		rdev->pfp_fw = NULL;
1956 		release_firmware(rdev->me_fw);
1957 		rdev->me_fw = NULL;
1958 		release_firmware(rdev->ce_fw);
1959 		rdev->ce_fw = NULL;
1960 		release_firmware(rdev->rlc_fw);
1961 		rdev->rlc_fw = NULL;
1962 		release_firmware(rdev->mc_fw);
1963 		rdev->mc_fw = NULL;
1964 		release_firmware(rdev->smc_fw);
1965 		rdev->smc_fw = NULL;
1966 	}
1967 	return err;
1968 }
1969 
1970 /* watermark setup */
dce6_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode,struct drm_display_mode * other_mode)1971 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1972 				   struct radeon_crtc *radeon_crtc,
1973 				   struct drm_display_mode *mode,
1974 				   struct drm_display_mode *other_mode)
1975 {
1976 	u32 tmp, buffer_alloc, i;
1977 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1978 	/*
1979 	 * Line Buffer Setup
1980 	 * There are 3 line buffers, each one shared by 2 display controllers.
1981 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1982 	 * the display controllers.  The paritioning is done via one of four
1983 	 * preset allocations specified in bits 21:20:
1984 	 *  0 - half lb
1985 	 *  2 - whole lb, other crtc must be disabled
1986 	 */
1987 	/* this can get tricky if we have two large displays on a paired group
1988 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1989 	 * non-linked crtcs for maximum line buffer allocation.
1990 	 */
1991 	if (radeon_crtc->base.enabled && mode) {
1992 		if (other_mode) {
1993 			tmp = 0; /* 1/2 */
1994 			buffer_alloc = 1;
1995 		} else {
1996 			tmp = 2; /* whole */
1997 			buffer_alloc = 2;
1998 		}
1999 	} else {
2000 		tmp = 0;
2001 		buffer_alloc = 0;
2002 	}
2003 
2004 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2005 	       DC_LB_MEMORY_CONFIG(tmp));
2006 
2007 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2008 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2009 	for (i = 0; i < rdev->usec_timeout; i++) {
2010 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2011 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2012 			break;
2013 		udelay(1);
2014 	}
2015 
2016 	if (radeon_crtc->base.enabled && mode) {
2017 		switch (tmp) {
2018 		case 0:
2019 		default:
2020 			return 4096 * 2;
2021 		case 2:
2022 			return 8192 * 2;
2023 		}
2024 	}
2025 
2026 	/* controller not enabled, so no lb used */
2027 	return 0;
2028 }
2029 
si_get_number_of_dram_channels(struct radeon_device * rdev)2030 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2031 {
2032 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2033 
2034 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2035 	case 0:
2036 	default:
2037 		return 1;
2038 	case 1:
2039 		return 2;
2040 	case 2:
2041 		return 4;
2042 	case 3:
2043 		return 8;
2044 	case 4:
2045 		return 3;
2046 	case 5:
2047 		return 6;
2048 	case 6:
2049 		return 10;
2050 	case 7:
2051 		return 12;
2052 	case 8:
2053 		return 16;
2054 	}
2055 }
2056 
2057 struct dce6_wm_params {
2058 	u32 dram_channels; /* number of dram channels */
2059 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2060 	u32 sclk;          /* engine clock in kHz */
2061 	u32 disp_clk;      /* display clock in kHz */
2062 	u32 src_width;     /* viewport width */
2063 	u32 active_time;   /* active display time in ns */
2064 	u32 blank_time;    /* blank time in ns */
2065 	bool interlaced;    /* mode is interlaced */
2066 	fixed20_12 vsc;    /* vertical scale ratio */
2067 	u32 num_heads;     /* number of active crtcs */
2068 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2069 	u32 lb_size;       /* line buffer allocated to pipe */
2070 	u32 vtaps;         /* vertical scaler taps */
2071 };
2072 
dce6_dram_bandwidth(struct dce6_wm_params * wm)2073 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2074 {
2075 	/* Calculate raw DRAM Bandwidth */
2076 	fixed20_12 dram_efficiency; /* 0.7 */
2077 	fixed20_12 yclk, dram_channels, bandwidth;
2078 	fixed20_12 a;
2079 
2080 	a.full = dfixed_const(1000);
2081 	yclk.full = dfixed_const(wm->yclk);
2082 	yclk.full = dfixed_div(yclk, a);
2083 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2084 	a.full = dfixed_const(10);
2085 	dram_efficiency.full = dfixed_const(7);
2086 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2087 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2088 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2089 
2090 	return dfixed_trunc(bandwidth);
2091 }
2092 
dce6_dram_bandwidth_for_display(struct dce6_wm_params * wm)2093 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2094 {
2095 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2096 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2097 	fixed20_12 yclk, dram_channels, bandwidth;
2098 	fixed20_12 a;
2099 
2100 	a.full = dfixed_const(1000);
2101 	yclk.full = dfixed_const(wm->yclk);
2102 	yclk.full = dfixed_div(yclk, a);
2103 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2104 	a.full = dfixed_const(10);
2105 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2106 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2107 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2108 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2109 
2110 	return dfixed_trunc(bandwidth);
2111 }
2112 
dce6_data_return_bandwidth(struct dce6_wm_params * wm)2113 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2114 {
2115 	/* Calculate the display Data return Bandwidth */
2116 	fixed20_12 return_efficiency; /* 0.8 */
2117 	fixed20_12 sclk, bandwidth;
2118 	fixed20_12 a;
2119 
2120 	a.full = dfixed_const(1000);
2121 	sclk.full = dfixed_const(wm->sclk);
2122 	sclk.full = dfixed_div(sclk, a);
2123 	a.full = dfixed_const(10);
2124 	return_efficiency.full = dfixed_const(8);
2125 	return_efficiency.full = dfixed_div(return_efficiency, a);
2126 	a.full = dfixed_const(32);
2127 	bandwidth.full = dfixed_mul(a, sclk);
2128 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2129 
2130 	return dfixed_trunc(bandwidth);
2131 }
2132 
dce6_get_dmif_bytes_per_request(struct dce6_wm_params * wm)2133 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2134 {
2135 	return 32;
2136 }
2137 
dce6_dmif_request_bandwidth(struct dce6_wm_params * wm)2138 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2139 {
2140 	/* Calculate the DMIF Request Bandwidth */
2141 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2142 	fixed20_12 disp_clk, sclk, bandwidth;
2143 	fixed20_12 a, b1, b2;
2144 	u32 min_bandwidth;
2145 
2146 	a.full = dfixed_const(1000);
2147 	disp_clk.full = dfixed_const(wm->disp_clk);
2148 	disp_clk.full = dfixed_div(disp_clk, a);
2149 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2150 	b1.full = dfixed_mul(a, disp_clk);
2151 
2152 	a.full = dfixed_const(1000);
2153 	sclk.full = dfixed_const(wm->sclk);
2154 	sclk.full = dfixed_div(sclk, a);
2155 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2156 	b2.full = dfixed_mul(a, sclk);
2157 
2158 	a.full = dfixed_const(10);
2159 	disp_clk_request_efficiency.full = dfixed_const(8);
2160 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2161 
2162 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2163 
2164 	a.full = dfixed_const(min_bandwidth);
2165 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2166 
2167 	return dfixed_trunc(bandwidth);
2168 }
2169 
dce6_available_bandwidth(struct dce6_wm_params * wm)2170 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2171 {
2172 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2173 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2174 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2175 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2176 
2177 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2178 }
2179 
dce6_average_bandwidth(struct dce6_wm_params * wm)2180 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2181 {
2182 	/* Calculate the display mode Average Bandwidth
2183 	 * DisplayMode should contain the source and destination dimensions,
2184 	 * timing, etc.
2185 	 */
2186 	fixed20_12 bpp;
2187 	fixed20_12 line_time;
2188 	fixed20_12 src_width;
2189 	fixed20_12 bandwidth;
2190 	fixed20_12 a;
2191 
2192 	a.full = dfixed_const(1000);
2193 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2194 	line_time.full = dfixed_div(line_time, a);
2195 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2196 	src_width.full = dfixed_const(wm->src_width);
2197 	bandwidth.full = dfixed_mul(src_width, bpp);
2198 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2199 	bandwidth.full = dfixed_div(bandwidth, line_time);
2200 
2201 	return dfixed_trunc(bandwidth);
2202 }
2203 
dce6_latency_watermark(struct dce6_wm_params * wm)2204 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2205 {
2206 	/* First calcualte the latency in ns */
2207 	u32 mc_latency = 2000; /* 2000 ns. */
2208 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2209 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2210 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2211 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2212 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2213 		(wm->num_heads * cursor_line_pair_return_time);
2214 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2215 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2216 	u32 tmp, dmif_size = 12288;
2217 	fixed20_12 a, b, c;
2218 
2219 	if (wm->num_heads == 0)
2220 		return 0;
2221 
2222 	a.full = dfixed_const(2);
2223 	b.full = dfixed_const(1);
2224 	if ((wm->vsc.full > a.full) ||
2225 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2226 	    (wm->vtaps >= 5) ||
2227 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2228 		max_src_lines_per_dst_line = 4;
2229 	else
2230 		max_src_lines_per_dst_line = 2;
2231 
2232 	a.full = dfixed_const(available_bandwidth);
2233 	b.full = dfixed_const(wm->num_heads);
2234 	a.full = dfixed_div(a, b);
2235 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2236 	tmp = min(dfixed_trunc(a), tmp);
2237 
2238 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2239 
2240 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2241 	b.full = dfixed_const(1000);
2242 	c.full = dfixed_const(lb_fill_bw);
2243 	b.full = dfixed_div(c, b);
2244 	a.full = dfixed_div(a, b);
2245 	line_fill_time = dfixed_trunc(a);
2246 
2247 	if (line_fill_time < wm->active_time)
2248 		return latency;
2249 	else
2250 		return latency + (line_fill_time - wm->active_time);
2251 
2252 }
2253 
dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params * wm)2254 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2255 {
2256 	if (dce6_average_bandwidth(wm) <=
2257 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2258 		return true;
2259 	else
2260 		return false;
2261 };
2262 
dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params * wm)2263 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2264 {
2265 	if (dce6_average_bandwidth(wm) <=
2266 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2267 		return true;
2268 	else
2269 		return false;
2270 };
2271 
dce6_check_latency_hiding(struct dce6_wm_params * wm)2272 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2273 {
2274 	u32 lb_partitions = wm->lb_size / wm->src_width;
2275 	u32 line_time = wm->active_time + wm->blank_time;
2276 	u32 latency_tolerant_lines;
2277 	u32 latency_hiding;
2278 	fixed20_12 a;
2279 
2280 	a.full = dfixed_const(1);
2281 	if (wm->vsc.full > a.full)
2282 		latency_tolerant_lines = 1;
2283 	else {
2284 		if (lb_partitions <= (wm->vtaps + 1))
2285 			latency_tolerant_lines = 1;
2286 		else
2287 			latency_tolerant_lines = 2;
2288 	}
2289 
2290 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2291 
2292 	if (dce6_latency_watermark(wm) <= latency_hiding)
2293 		return true;
2294 	else
2295 		return false;
2296 }
2297 
dce6_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)2298 static void dce6_program_watermarks(struct radeon_device *rdev,
2299 					 struct radeon_crtc *radeon_crtc,
2300 					 u32 lb_size, u32 num_heads)
2301 {
2302 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2303 	struct dce6_wm_params wm_low, wm_high;
2304 	u32 dram_channels;
2305 	u32 active_time;
2306 	u32 line_time = 0;
2307 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2308 	u32 priority_a_mark = 0, priority_b_mark = 0;
2309 	u32 priority_a_cnt = PRIORITY_OFF;
2310 	u32 priority_b_cnt = PRIORITY_OFF;
2311 	u32 tmp, arb_control3;
2312 	fixed20_12 a, b, c;
2313 
2314 	if (radeon_crtc->base.enabled && num_heads && mode) {
2315 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2316 					    (u32)mode->clock);
2317 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2318 					  (u32)mode->clock);
2319 		line_time = min(line_time, (u32)65535);
2320 		priority_a_cnt = 0;
2321 		priority_b_cnt = 0;
2322 
2323 		if (rdev->family == CHIP_ARUBA)
2324 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2325 		else
2326 			dram_channels = si_get_number_of_dram_channels(rdev);
2327 
2328 		/* watermark for high clocks */
2329 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2330 			wm_high.yclk =
2331 				radeon_dpm_get_mclk(rdev, false) * 10;
2332 			wm_high.sclk =
2333 				radeon_dpm_get_sclk(rdev, false) * 10;
2334 		} else {
2335 			wm_high.yclk = rdev->pm.current_mclk * 10;
2336 			wm_high.sclk = rdev->pm.current_sclk * 10;
2337 		}
2338 
2339 		wm_high.disp_clk = mode->clock;
2340 		wm_high.src_width = mode->crtc_hdisplay;
2341 		wm_high.active_time = active_time;
2342 		wm_high.blank_time = line_time - wm_high.active_time;
2343 		wm_high.interlaced = false;
2344 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2345 			wm_high.interlaced = true;
2346 		wm_high.vsc = radeon_crtc->vsc;
2347 		wm_high.vtaps = 1;
2348 		if (radeon_crtc->rmx_type != RMX_OFF)
2349 			wm_high.vtaps = 2;
2350 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2351 		wm_high.lb_size = lb_size;
2352 		wm_high.dram_channels = dram_channels;
2353 		wm_high.num_heads = num_heads;
2354 
2355 		/* watermark for low clocks */
2356 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2357 			wm_low.yclk =
2358 				radeon_dpm_get_mclk(rdev, true) * 10;
2359 			wm_low.sclk =
2360 				radeon_dpm_get_sclk(rdev, true) * 10;
2361 		} else {
2362 			wm_low.yclk = rdev->pm.current_mclk * 10;
2363 			wm_low.sclk = rdev->pm.current_sclk * 10;
2364 		}
2365 
2366 		wm_low.disp_clk = mode->clock;
2367 		wm_low.src_width = mode->crtc_hdisplay;
2368 		wm_low.active_time = active_time;
2369 		wm_low.blank_time = line_time - wm_low.active_time;
2370 		wm_low.interlaced = false;
2371 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2372 			wm_low.interlaced = true;
2373 		wm_low.vsc = radeon_crtc->vsc;
2374 		wm_low.vtaps = 1;
2375 		if (radeon_crtc->rmx_type != RMX_OFF)
2376 			wm_low.vtaps = 2;
2377 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2378 		wm_low.lb_size = lb_size;
2379 		wm_low.dram_channels = dram_channels;
2380 		wm_low.num_heads = num_heads;
2381 
2382 		/* set for high clocks */
2383 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2384 		/* set for low clocks */
2385 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2386 
2387 		/* possibly force display priority to high */
2388 		/* should really do this at mode validation time... */
2389 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2390 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2391 		    !dce6_check_latency_hiding(&wm_high) ||
2392 		    (rdev->disp_priority == 2)) {
2393 			DRM_DEBUG_KMS("force priority to high\n");
2394 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2395 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2396 		}
2397 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2398 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2399 		    !dce6_check_latency_hiding(&wm_low) ||
2400 		    (rdev->disp_priority == 2)) {
2401 			DRM_DEBUG_KMS("force priority to high\n");
2402 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2403 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2404 		}
2405 
2406 		a.full = dfixed_const(1000);
2407 		b.full = dfixed_const(mode->clock);
2408 		b.full = dfixed_div(b, a);
2409 		c.full = dfixed_const(latency_watermark_a);
2410 		c.full = dfixed_mul(c, b);
2411 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2412 		c.full = dfixed_div(c, a);
2413 		a.full = dfixed_const(16);
2414 		c.full = dfixed_div(c, a);
2415 		priority_a_mark = dfixed_trunc(c);
2416 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2417 
2418 		a.full = dfixed_const(1000);
2419 		b.full = dfixed_const(mode->clock);
2420 		b.full = dfixed_div(b, a);
2421 		c.full = dfixed_const(latency_watermark_b);
2422 		c.full = dfixed_mul(c, b);
2423 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2424 		c.full = dfixed_div(c, a);
2425 		a.full = dfixed_const(16);
2426 		c.full = dfixed_div(c, a);
2427 		priority_b_mark = dfixed_trunc(c);
2428 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2429 
2430 		/* Save number of lines the linebuffer leads before the scanout */
2431 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2432 	}
2433 
2434 	/* select wm A */
2435 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2436 	tmp = arb_control3;
2437 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2438 	tmp |= LATENCY_WATERMARK_MASK(1);
2439 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2440 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2441 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2442 		LATENCY_HIGH_WATERMARK(line_time)));
2443 	/* select wm B */
2444 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2445 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2446 	tmp |= LATENCY_WATERMARK_MASK(2);
2447 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2448 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2449 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2450 		LATENCY_HIGH_WATERMARK(line_time)));
2451 	/* restore original selection */
2452 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2453 
2454 	/* write the priority marks */
2455 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2456 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2457 
2458 	/* save values for DPM */
2459 	radeon_crtc->line_time = line_time;
2460 	radeon_crtc->wm_high = latency_watermark_a;
2461 	radeon_crtc->wm_low = latency_watermark_b;
2462 }
2463 
dce6_bandwidth_update(struct radeon_device * rdev)2464 void dce6_bandwidth_update(struct radeon_device *rdev)
2465 {
2466 	struct drm_display_mode *mode0 = NULL;
2467 	struct drm_display_mode *mode1 = NULL;
2468 	u32 num_heads = 0, lb_size;
2469 	int i;
2470 
2471 	if (!rdev->mode_info.mode_config_initialized)
2472 		return;
2473 
2474 	radeon_update_display_priority(rdev);
2475 
2476 	for (i = 0; i < rdev->num_crtc; i++) {
2477 		if (rdev->mode_info.crtcs[i]->base.enabled)
2478 			num_heads++;
2479 	}
2480 	for (i = 0; i < rdev->num_crtc; i += 2) {
2481 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2482 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2483 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2484 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2485 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2486 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2487 	}
2488 }
2489 
2490 /*
2491  * Core functions
2492  */
si_tiling_mode_table_init(struct radeon_device * rdev)2493 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2494 {
2495 	u32 *tile = rdev->config.si.tile_mode_array;
2496 	const u32 num_tile_mode_states =
2497 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2498 	u32 reg_offset, split_equal_to_row_size;
2499 
2500 	switch (rdev->config.si.mem_row_size_in_kb) {
2501 	case 1:
2502 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2503 		break;
2504 	case 2:
2505 	default:
2506 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2507 		break;
2508 	case 4:
2509 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2510 		break;
2511 	}
2512 
2513 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2514 		tile[reg_offset] = 0;
2515 
2516 	switch(rdev->family) {
2517 	case CHIP_TAHITI:
2518 	case CHIP_PITCAIRN:
2519 		/* non-AA compressed depth or any compressed stencil */
2520 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2522 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2523 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2524 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2525 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2527 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2528 		/* 2xAA/4xAA compressed depth only */
2529 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2532 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2533 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2534 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2537 		/* 8xAA compressed depth only */
2538 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2540 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2541 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2542 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2543 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2546 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2547 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2549 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2550 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2551 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2552 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2554 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2555 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2556 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2557 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2558 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2559 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2560 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2561 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2563 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2564 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2565 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2567 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2568 			   TILE_SPLIT(split_equal_to_row_size) |
2569 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2570 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2572 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2574 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2576 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2577 			   TILE_SPLIT(split_equal_to_row_size) |
2578 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2579 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2582 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2583 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2585 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 			   TILE_SPLIT(split_equal_to_row_size) |
2587 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2588 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2591 		/* 1D and 1D Array Surfaces */
2592 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2593 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2594 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2596 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2597 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2600 		/* Displayable maps. */
2601 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2604 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2605 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2606 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2608 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2609 		/* Display 8bpp. */
2610 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2611 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2613 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2614 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2615 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2617 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2618 		/* Display 16bpp. */
2619 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2621 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2622 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2623 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2624 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2627 		/* Display 32bpp. */
2628 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2631 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2632 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2633 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2635 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2636 		/* Thin. */
2637 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2639 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2640 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2641 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2642 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2644 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2645 		/* Thin 8 bpp. */
2646 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2648 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2650 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2651 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2654 		/* Thin 16 bpp. */
2655 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2658 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2659 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2660 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2662 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2663 		/* Thin 32 bpp. */
2664 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2667 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2668 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2669 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2671 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2672 		/* Thin 64 bpp. */
2673 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2675 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2676 			   TILE_SPLIT(split_equal_to_row_size) |
2677 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2678 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2681 		/* 8 bpp PRT. */
2682 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2684 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2686 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2687 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2688 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2689 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2690 		/* 16 bpp PRT */
2691 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2696 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2698 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2699 		/* 32 bpp PRT */
2700 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2702 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2703 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2704 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2705 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2707 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2708 		/* 64 bpp PRT */
2709 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2711 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2712 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2713 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2714 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2717 		/* 128 bpp PRT */
2718 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2719 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2720 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2721 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2722 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2723 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2725 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2726 
2727 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2728 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2729 		break;
2730 
2731 	case CHIP_VERDE:
2732 	case CHIP_OLAND:
2733 	case CHIP_HAINAN:
2734 		/* non-AA compressed depth or any compressed stencil */
2735 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2739 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2740 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2743 		/* 2xAA/4xAA compressed depth only */
2744 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2746 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2748 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2749 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2752 		/* 8xAA compressed depth only */
2753 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2755 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2757 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2758 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2761 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2762 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2764 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2766 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2767 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2769 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2770 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2771 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2772 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2773 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2775 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2776 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2778 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2779 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2780 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2782 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			   TILE_SPLIT(split_equal_to_row_size) |
2784 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2785 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2787 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2788 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2789 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2790 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2791 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2792 			   TILE_SPLIT(split_equal_to_row_size) |
2793 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2794 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2796 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2797 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2798 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2800 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801 			   TILE_SPLIT(split_equal_to_row_size) |
2802 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2803 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2805 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2806 		/* 1D and 1D Array Surfaces */
2807 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2808 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2811 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2812 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2814 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2815 		/* Displayable maps. */
2816 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2817 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2818 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2820 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2821 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2824 		/* Display 8bpp. */
2825 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2829 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2830 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2832 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2833 		/* Display 16bpp. */
2834 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2838 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2839 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2841 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2842 		/* Display 32bpp. */
2843 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2844 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2846 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2847 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2848 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2851 		/* Thin. */
2852 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2853 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2854 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2855 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2856 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2857 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2860 		/* Thin 8 bpp. */
2861 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2862 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2863 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2864 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2865 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2866 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2869 		/* Thin 16 bpp. */
2870 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2872 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2874 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2875 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2877 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2878 		/* Thin 32 bpp. */
2879 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2881 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2882 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2883 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2884 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2887 		/* Thin 64 bpp. */
2888 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2890 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2891 			   TILE_SPLIT(split_equal_to_row_size) |
2892 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2893 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2896 		/* 8 bpp PRT. */
2897 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2899 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2900 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2901 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2902 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2903 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2904 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2905 		/* 16 bpp PRT */
2906 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2908 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2909 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2910 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2911 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2913 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2914 		/* 32 bpp PRT */
2915 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2917 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2918 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2919 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2920 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2923 		/* 64 bpp PRT */
2924 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2925 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2926 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2927 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2928 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2929 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2931 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2932 		/* 128 bpp PRT */
2933 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2935 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2936 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2937 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2938 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2941 
2942 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2943 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2944 		break;
2945 
2946 	default:
2947 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2948 	}
2949 }
2950 
si_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)2951 static void si_select_se_sh(struct radeon_device *rdev,
2952 			    u32 se_num, u32 sh_num)
2953 {
2954 	u32 data = INSTANCE_BROADCAST_WRITES;
2955 
2956 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2957 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2958 	else if (se_num == 0xffffffff)
2959 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2960 	else if (sh_num == 0xffffffff)
2961 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2962 	else
2963 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2964 	WREG32(GRBM_GFX_INDEX, data);
2965 }
2966 
si_create_bitmask(u32 bit_width)2967 static u32 si_create_bitmask(u32 bit_width)
2968 {
2969 	u32 i, mask = 0;
2970 
2971 	for (i = 0; i < bit_width; i++) {
2972 		mask <<= 1;
2973 		mask |= 1;
2974 	}
2975 	return mask;
2976 }
2977 
si_get_cu_enabled(struct radeon_device * rdev,u32 cu_per_sh)2978 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2979 {
2980 	u32 data, mask;
2981 
2982 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2983 	if (data & 1)
2984 		data &= INACTIVE_CUS_MASK;
2985 	else
2986 		data = 0;
2987 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2988 
2989 	data >>= INACTIVE_CUS_SHIFT;
2990 
2991 	mask = si_create_bitmask(cu_per_sh);
2992 
2993 	return ~data & mask;
2994 }
2995 
si_setup_spi(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 cu_per_sh)2996 static void si_setup_spi(struct radeon_device *rdev,
2997 			 u32 se_num, u32 sh_per_se,
2998 			 u32 cu_per_sh)
2999 {
3000 	int i, j, k;
3001 	u32 data, mask, active_cu;
3002 
3003 	for (i = 0; i < se_num; i++) {
3004 		for (j = 0; j < sh_per_se; j++) {
3005 			si_select_se_sh(rdev, i, j);
3006 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3007 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3008 
3009 			mask = 1;
3010 			for (k = 0; k < 16; k++) {
3011 				mask <<= k;
3012 				if (active_cu & mask) {
3013 					data &= ~mask;
3014 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3015 					break;
3016 				}
3017 			}
3018 		}
3019 	}
3020 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3021 }
3022 
si_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3023 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3024 			      u32 max_rb_num_per_se,
3025 			      u32 sh_per_se)
3026 {
3027 	u32 data, mask;
3028 
3029 	data = RREG32(CC_RB_BACKEND_DISABLE);
3030 	if (data & 1)
3031 		data &= BACKEND_DISABLE_MASK;
3032 	else
3033 		data = 0;
3034 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3035 
3036 	data >>= BACKEND_DISABLE_SHIFT;
3037 
3038 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3039 
3040 	return data & mask;
3041 }
3042 
si_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3043 static void si_setup_rb(struct radeon_device *rdev,
3044 			u32 se_num, u32 sh_per_se,
3045 			u32 max_rb_num_per_se)
3046 {
3047 	int i, j;
3048 	u32 data, mask;
3049 	u32 disabled_rbs = 0;
3050 	u32 enabled_rbs = 0;
3051 
3052 	for (i = 0; i < se_num; i++) {
3053 		for (j = 0; j < sh_per_se; j++) {
3054 			si_select_se_sh(rdev, i, j);
3055 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3056 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3057 		}
3058 	}
3059 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3060 
3061 	mask = 1;
3062 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3063 		if (!(disabled_rbs & mask))
3064 			enabled_rbs |= mask;
3065 		mask <<= 1;
3066 	}
3067 
3068 	rdev->config.si.backend_enable_mask = enabled_rbs;
3069 
3070 	for (i = 0; i < se_num; i++) {
3071 		si_select_se_sh(rdev, i, 0xffffffff);
3072 		data = 0;
3073 		for (j = 0; j < sh_per_se; j++) {
3074 			switch (enabled_rbs & 3) {
3075 			case 1:
3076 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3077 				break;
3078 			case 2:
3079 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3080 				break;
3081 			case 3:
3082 			default:
3083 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3084 				break;
3085 			}
3086 			enabled_rbs >>= 2;
3087 		}
3088 		WREG32(PA_SC_RASTER_CONFIG, data);
3089 	}
3090 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3091 }
3092 
si_gpu_init(struct radeon_device * rdev)3093 static void si_gpu_init(struct radeon_device *rdev)
3094 {
3095 	u32 gb_addr_config = 0;
3096 	u32 mc_shared_chmap, mc_arb_ramcfg;
3097 	u32 sx_debug_1;
3098 	u32 hdp_host_path_cntl;
3099 	u32 tmp;
3100 	int i, j;
3101 
3102 	switch (rdev->family) {
3103 	case CHIP_TAHITI:
3104 		rdev->config.si.max_shader_engines = 2;
3105 		rdev->config.si.max_tile_pipes = 12;
3106 		rdev->config.si.max_cu_per_sh = 8;
3107 		rdev->config.si.max_sh_per_se = 2;
3108 		rdev->config.si.max_backends_per_se = 4;
3109 		rdev->config.si.max_texture_channel_caches = 12;
3110 		rdev->config.si.max_gprs = 256;
3111 		rdev->config.si.max_gs_threads = 32;
3112 		rdev->config.si.max_hw_contexts = 8;
3113 
3114 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3115 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3116 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3117 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3118 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3119 		break;
3120 	case CHIP_PITCAIRN:
3121 		rdev->config.si.max_shader_engines = 2;
3122 		rdev->config.si.max_tile_pipes = 8;
3123 		rdev->config.si.max_cu_per_sh = 5;
3124 		rdev->config.si.max_sh_per_se = 2;
3125 		rdev->config.si.max_backends_per_se = 4;
3126 		rdev->config.si.max_texture_channel_caches = 8;
3127 		rdev->config.si.max_gprs = 256;
3128 		rdev->config.si.max_gs_threads = 32;
3129 		rdev->config.si.max_hw_contexts = 8;
3130 
3131 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3132 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3133 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3134 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3135 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3136 		break;
3137 	case CHIP_VERDE:
3138 	default:
3139 		rdev->config.si.max_shader_engines = 1;
3140 		rdev->config.si.max_tile_pipes = 4;
3141 		rdev->config.si.max_cu_per_sh = 5;
3142 		rdev->config.si.max_sh_per_se = 2;
3143 		rdev->config.si.max_backends_per_se = 4;
3144 		rdev->config.si.max_texture_channel_caches = 4;
3145 		rdev->config.si.max_gprs = 256;
3146 		rdev->config.si.max_gs_threads = 32;
3147 		rdev->config.si.max_hw_contexts = 8;
3148 
3149 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3150 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3151 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3152 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3153 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3154 		break;
3155 	case CHIP_OLAND:
3156 		rdev->config.si.max_shader_engines = 1;
3157 		rdev->config.si.max_tile_pipes = 4;
3158 		rdev->config.si.max_cu_per_sh = 6;
3159 		rdev->config.si.max_sh_per_se = 1;
3160 		rdev->config.si.max_backends_per_se = 2;
3161 		rdev->config.si.max_texture_channel_caches = 4;
3162 		rdev->config.si.max_gprs = 256;
3163 		rdev->config.si.max_gs_threads = 16;
3164 		rdev->config.si.max_hw_contexts = 8;
3165 
3166 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3167 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3168 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3169 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3170 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3171 		break;
3172 	case CHIP_HAINAN:
3173 		rdev->config.si.max_shader_engines = 1;
3174 		rdev->config.si.max_tile_pipes = 4;
3175 		rdev->config.si.max_cu_per_sh = 5;
3176 		rdev->config.si.max_sh_per_se = 1;
3177 		rdev->config.si.max_backends_per_se = 1;
3178 		rdev->config.si.max_texture_channel_caches = 2;
3179 		rdev->config.si.max_gprs = 256;
3180 		rdev->config.si.max_gs_threads = 16;
3181 		rdev->config.si.max_hw_contexts = 8;
3182 
3183 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3184 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3185 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3186 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3187 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3188 		break;
3189 	}
3190 
3191 	/* Initialize HDP */
3192 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3193 		WREG32((0x2c14 + j), 0x00000000);
3194 		WREG32((0x2c18 + j), 0x00000000);
3195 		WREG32((0x2c1c + j), 0x00000000);
3196 		WREG32((0x2c20 + j), 0x00000000);
3197 		WREG32((0x2c24 + j), 0x00000000);
3198 	}
3199 
3200 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3201 	WREG32(SRBM_INT_CNTL, 1);
3202 	WREG32(SRBM_INT_ACK, 1);
3203 
3204 	evergreen_fix_pci_max_read_req_size(rdev);
3205 
3206 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3207 
3208 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3209 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3210 
3211 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3212 	rdev->config.si.mem_max_burst_length_bytes = 256;
3213 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3214 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3215 	if (rdev->config.si.mem_row_size_in_kb > 4)
3216 		rdev->config.si.mem_row_size_in_kb = 4;
3217 	/* XXX use MC settings? */
3218 	rdev->config.si.shader_engine_tile_size = 32;
3219 	rdev->config.si.num_gpus = 1;
3220 	rdev->config.si.multi_gpu_tile_size = 64;
3221 
3222 	/* fix up row size */
3223 	gb_addr_config &= ~ROW_SIZE_MASK;
3224 	switch (rdev->config.si.mem_row_size_in_kb) {
3225 	case 1:
3226 	default:
3227 		gb_addr_config |= ROW_SIZE(0);
3228 		break;
3229 	case 2:
3230 		gb_addr_config |= ROW_SIZE(1);
3231 		break;
3232 	case 4:
3233 		gb_addr_config |= ROW_SIZE(2);
3234 		break;
3235 	}
3236 
3237 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3238 	 * not have bank info, so create a custom tiling dword.
3239 	 * bits 3:0   num_pipes
3240 	 * bits 7:4   num_banks
3241 	 * bits 11:8  group_size
3242 	 * bits 15:12 row_size
3243 	 */
3244 	rdev->config.si.tile_config = 0;
3245 	switch (rdev->config.si.num_tile_pipes) {
3246 	case 1:
3247 		rdev->config.si.tile_config |= (0 << 0);
3248 		break;
3249 	case 2:
3250 		rdev->config.si.tile_config |= (1 << 0);
3251 		break;
3252 	case 4:
3253 		rdev->config.si.tile_config |= (2 << 0);
3254 		break;
3255 	case 8:
3256 	default:
3257 		/* XXX what about 12? */
3258 		rdev->config.si.tile_config |= (3 << 0);
3259 		break;
3260 	}
3261 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3262 	case 0: /* four banks */
3263 		rdev->config.si.tile_config |= 0 << 4;
3264 		break;
3265 	case 1: /* eight banks */
3266 		rdev->config.si.tile_config |= 1 << 4;
3267 		break;
3268 	case 2: /* sixteen banks */
3269 	default:
3270 		rdev->config.si.tile_config |= 2 << 4;
3271 		break;
3272 	}
3273 	rdev->config.si.tile_config |=
3274 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3275 	rdev->config.si.tile_config |=
3276 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3277 
3278 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3279 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3280 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3281 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3282 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3283 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3284 	if (rdev->has_uvd) {
3285 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3286 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3287 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3288 	}
3289 
3290 	si_tiling_mode_table_init(rdev);
3291 
3292 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3293 		    rdev->config.si.max_sh_per_se,
3294 		    rdev->config.si.max_backends_per_se);
3295 
3296 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3297 		     rdev->config.si.max_sh_per_se,
3298 		     rdev->config.si.max_cu_per_sh);
3299 
3300 	rdev->config.si.active_cus = 0;
3301 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3302 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3303 			rdev->config.si.active_cus +=
3304 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3305 		}
3306 	}
3307 
3308 	/* set HW defaults for 3D engine */
3309 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3310 				     ROQ_IB2_START(0x2b)));
3311 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3312 
3313 	sx_debug_1 = RREG32(SX_DEBUG_1);
3314 	WREG32(SX_DEBUG_1, sx_debug_1);
3315 
3316 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3317 
3318 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3319 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3320 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3321 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3322 
3323 	WREG32(VGT_NUM_INSTANCES, 1);
3324 
3325 	WREG32(CP_PERFMON_CNTL, 0);
3326 
3327 	WREG32(SQ_CONFIG, 0);
3328 
3329 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3330 					  FORCE_EOV_MAX_REZ_CNT(255)));
3331 
3332 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3333 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3334 
3335 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3336 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3337 
3338 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3339 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3340 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3341 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3342 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3343 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3344 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3345 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3346 
3347 	tmp = RREG32(HDP_MISC_CNTL);
3348 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3349 	WREG32(HDP_MISC_CNTL, tmp);
3350 
3351 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3352 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3353 
3354 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3355 
3356 	udelay(50);
3357 }
3358 
3359 /*
3360  * GPU scratch registers helpers function.
3361  */
si_scratch_init(struct radeon_device * rdev)3362 static void si_scratch_init(struct radeon_device *rdev)
3363 {
3364 	int i;
3365 
3366 	rdev->scratch.num_reg = 7;
3367 	rdev->scratch.reg_base = SCRATCH_REG0;
3368 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3369 		rdev->scratch.free[i] = true;
3370 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3371 	}
3372 }
3373 
si_fence_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3374 void si_fence_ring_emit(struct radeon_device *rdev,
3375 			struct radeon_fence *fence)
3376 {
3377 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3378 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3379 
3380 	/* flush read cache over gart */
3381 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3382 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3383 	radeon_ring_write(ring, 0);
3384 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3385 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3386 			  PACKET3_TC_ACTION_ENA |
3387 			  PACKET3_SH_KCACHE_ACTION_ENA |
3388 			  PACKET3_SH_ICACHE_ACTION_ENA);
3389 	radeon_ring_write(ring, 0xFFFFFFFF);
3390 	radeon_ring_write(ring, 0);
3391 	radeon_ring_write(ring, 10); /* poll interval */
3392 	/* EVENT_WRITE_EOP - flush caches, send int */
3393 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3394 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3395 	radeon_ring_write(ring, lower_32_bits(addr));
3396 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3397 	radeon_ring_write(ring, fence->seq);
3398 	radeon_ring_write(ring, 0);
3399 }
3400 
3401 /*
3402  * IB stuff
3403  */
si_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)3404 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3405 {
3406 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3407 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3408 	u32 header;
3409 
3410 	if (ib->is_const_ib) {
3411 		/* set switch buffer packet before const IB */
3412 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3413 		radeon_ring_write(ring, 0);
3414 
3415 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3416 	} else {
3417 		u32 next_rptr;
3418 		if (ring->rptr_save_reg) {
3419 			next_rptr = ring->wptr + 3 + 4 + 8;
3420 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3421 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3422 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3423 			radeon_ring_write(ring, next_rptr);
3424 		} else if (rdev->wb.enabled) {
3425 			next_rptr = ring->wptr + 5 + 4 + 8;
3426 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3427 			radeon_ring_write(ring, (1 << 8));
3428 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3429 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3430 			radeon_ring_write(ring, next_rptr);
3431 		}
3432 
3433 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3434 	}
3435 
3436 	radeon_ring_write(ring, header);
3437 	radeon_ring_write(ring,
3438 #ifdef __BIG_ENDIAN
3439 			  (2 << 0) |
3440 #endif
3441 			  (ib->gpu_addr & 0xFFFFFFFC));
3442 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3443 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3444 
3445 	if (!ib->is_const_ib) {
3446 		/* flush read cache over gart for this vmid */
3447 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3448 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3449 		radeon_ring_write(ring, vm_id);
3450 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3451 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3452 				  PACKET3_TC_ACTION_ENA |
3453 				  PACKET3_SH_KCACHE_ACTION_ENA |
3454 				  PACKET3_SH_ICACHE_ACTION_ENA);
3455 		radeon_ring_write(ring, 0xFFFFFFFF);
3456 		radeon_ring_write(ring, 0);
3457 		radeon_ring_write(ring, 10); /* poll interval */
3458 	}
3459 }
3460 
3461 /*
3462  * CP.
3463  */
si_cp_enable(struct radeon_device * rdev,bool enable)3464 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3465 {
3466 	if (enable)
3467 		WREG32(CP_ME_CNTL, 0);
3468 	else {
3469 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3470 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3471 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3472 		WREG32(SCRATCH_UMSK, 0);
3473 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3474 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3475 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3476 	}
3477 	udelay(50);
3478 }
3479 
si_cp_load_microcode(struct radeon_device * rdev)3480 static int si_cp_load_microcode(struct radeon_device *rdev)
3481 {
3482 	int i;
3483 
3484 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3485 		return -EINVAL;
3486 
3487 	si_cp_enable(rdev, false);
3488 
3489 	if (rdev->new_fw) {
3490 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3491 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3492 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3493 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3494 		const struct gfx_firmware_header_v1_0 *me_hdr =
3495 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3496 		const __le32 *fw_data;
3497 		u32 fw_size;
3498 
3499 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3500 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3501 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3502 
3503 		/* PFP */
3504 		fw_data = (const __le32 *)
3505 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3506 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3507 		WREG32(CP_PFP_UCODE_ADDR, 0);
3508 		for (i = 0; i < fw_size; i++)
3509 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3510 		WREG32(CP_PFP_UCODE_ADDR, 0);
3511 
3512 		/* CE */
3513 		fw_data = (const __le32 *)
3514 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3515 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3516 		WREG32(CP_CE_UCODE_ADDR, 0);
3517 		for (i = 0; i < fw_size; i++)
3518 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3519 		WREG32(CP_CE_UCODE_ADDR, 0);
3520 
3521 		/* ME */
3522 		fw_data = (const __be32 *)
3523 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3524 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3525 		WREG32(CP_ME_RAM_WADDR, 0);
3526 		for (i = 0; i < fw_size; i++)
3527 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3528 		WREG32(CP_ME_RAM_WADDR, 0);
3529 	} else {
3530 		const __be32 *fw_data;
3531 
3532 		/* PFP */
3533 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3534 		WREG32(CP_PFP_UCODE_ADDR, 0);
3535 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3536 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3537 		WREG32(CP_PFP_UCODE_ADDR, 0);
3538 
3539 		/* CE */
3540 		fw_data = (const __be32 *)rdev->ce_fw->data;
3541 		WREG32(CP_CE_UCODE_ADDR, 0);
3542 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3543 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3544 		WREG32(CP_CE_UCODE_ADDR, 0);
3545 
3546 		/* ME */
3547 		fw_data = (const __be32 *)rdev->me_fw->data;
3548 		WREG32(CP_ME_RAM_WADDR, 0);
3549 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3550 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3551 		WREG32(CP_ME_RAM_WADDR, 0);
3552 	}
3553 
3554 	WREG32(CP_PFP_UCODE_ADDR, 0);
3555 	WREG32(CP_CE_UCODE_ADDR, 0);
3556 	WREG32(CP_ME_RAM_WADDR, 0);
3557 	WREG32(CP_ME_RAM_RADDR, 0);
3558 	return 0;
3559 }
3560 
si_cp_start(struct radeon_device * rdev)3561 static int si_cp_start(struct radeon_device *rdev)
3562 {
3563 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3564 	int r, i;
3565 
3566 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3567 	if (r) {
3568 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3569 		return r;
3570 	}
3571 	/* init the CP */
3572 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3573 	radeon_ring_write(ring, 0x1);
3574 	radeon_ring_write(ring, 0x0);
3575 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3576 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3577 	radeon_ring_write(ring, 0);
3578 	radeon_ring_write(ring, 0);
3579 
3580 	/* init the CE partitions */
3581 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3582 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3583 	radeon_ring_write(ring, 0xc000);
3584 	radeon_ring_write(ring, 0xe000);
3585 	radeon_ring_unlock_commit(rdev, ring, false);
3586 
3587 	si_cp_enable(rdev, true);
3588 
3589 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3590 	if (r) {
3591 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3592 		return r;
3593 	}
3594 
3595 	/* setup clear context state */
3596 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3597 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3598 
3599 	for (i = 0; i < si_default_size; i++)
3600 		radeon_ring_write(ring, si_default_state[i]);
3601 
3602 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3603 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3604 
3605 	/* set clear context state */
3606 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3607 	radeon_ring_write(ring, 0);
3608 
3609 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3610 	radeon_ring_write(ring, 0x00000316);
3611 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3612 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3613 
3614 	radeon_ring_unlock_commit(rdev, ring, false);
3615 
3616 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3617 		ring = &rdev->ring[i];
3618 		r = radeon_ring_lock(rdev, ring, 2);
3619 		if (r) {
3620 			DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3621 			return r;
3622 		}
3623 
3624 		/* clear the compute context state */
3625 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3626 		radeon_ring_write(ring, 0);
3627 
3628 		radeon_ring_unlock_commit(rdev, ring, false);
3629 	}
3630 
3631 	return 0;
3632 }
3633 
si_cp_fini(struct radeon_device * rdev)3634 static void si_cp_fini(struct radeon_device *rdev)
3635 {
3636 	struct radeon_ring *ring;
3637 	si_cp_enable(rdev, false);
3638 
3639 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3640 	radeon_ring_fini(rdev, ring);
3641 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3642 
3643 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3644 	radeon_ring_fini(rdev, ring);
3645 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3646 
3647 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3648 	radeon_ring_fini(rdev, ring);
3649 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3650 }
3651 
si_cp_resume(struct radeon_device * rdev)3652 static int si_cp_resume(struct radeon_device *rdev)
3653 {
3654 	struct radeon_ring *ring;
3655 	u32 tmp;
3656 	u32 rb_bufsz;
3657 	int r;
3658 
3659 	si_enable_gui_idle_interrupt(rdev, false);
3660 
3661 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3662 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3663 
3664 	/* Set the write pointer delay */
3665 	WREG32(CP_RB_WPTR_DELAY, 0);
3666 
3667 	WREG32(CP_DEBUG, 0);
3668 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3669 
3670 	/* ring 0 - compute and gfx */
3671 	/* Set ring buffer size */
3672 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3673 	rb_bufsz = order_base_2(ring->ring_size / 8);
3674 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3675 #ifdef __BIG_ENDIAN
3676 	tmp |= BUF_SWAP_32BIT;
3677 #endif
3678 	WREG32(CP_RB0_CNTL, tmp);
3679 
3680 	/* Initialize the ring buffer's read and write pointers */
3681 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3682 	ring->wptr = 0;
3683 	WREG32(CP_RB0_WPTR, ring->wptr);
3684 
3685 	/* set the wb address whether it's enabled or not */
3686 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3687 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3688 
3689 	if (rdev->wb.enabled)
3690 		WREG32(SCRATCH_UMSK, 0xff);
3691 	else {
3692 		tmp |= RB_NO_UPDATE;
3693 		WREG32(SCRATCH_UMSK, 0);
3694 	}
3695 
3696 	mdelay(1);
3697 	WREG32(CP_RB0_CNTL, tmp);
3698 
3699 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3700 
3701 	/* ring1  - compute only */
3702 	/* Set ring buffer size */
3703 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3704 	rb_bufsz = order_base_2(ring->ring_size / 8);
3705 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3706 #ifdef __BIG_ENDIAN
3707 	tmp |= BUF_SWAP_32BIT;
3708 #endif
3709 	WREG32(CP_RB1_CNTL, tmp);
3710 
3711 	/* Initialize the ring buffer's read and write pointers */
3712 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3713 	ring->wptr = 0;
3714 	WREG32(CP_RB1_WPTR, ring->wptr);
3715 
3716 	/* set the wb address whether it's enabled or not */
3717 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3718 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3719 
3720 	mdelay(1);
3721 	WREG32(CP_RB1_CNTL, tmp);
3722 
3723 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3724 
3725 	/* ring2 - compute only */
3726 	/* Set ring buffer size */
3727 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3728 	rb_bufsz = order_base_2(ring->ring_size / 8);
3729 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3730 #ifdef __BIG_ENDIAN
3731 	tmp |= BUF_SWAP_32BIT;
3732 #endif
3733 	WREG32(CP_RB2_CNTL, tmp);
3734 
3735 	/* Initialize the ring buffer's read and write pointers */
3736 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3737 	ring->wptr = 0;
3738 	WREG32(CP_RB2_WPTR, ring->wptr);
3739 
3740 	/* set the wb address whether it's enabled or not */
3741 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3742 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3743 
3744 	mdelay(1);
3745 	WREG32(CP_RB2_CNTL, tmp);
3746 
3747 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3748 
3749 	/* start the rings */
3750 	si_cp_start(rdev);
3751 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3752 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3753 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3754 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3755 	if (r) {
3756 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3757 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3758 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3759 		return r;
3760 	}
3761 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3762 	if (r) {
3763 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3764 	}
3765 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3766 	if (r) {
3767 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3768 	}
3769 
3770 	si_enable_gui_idle_interrupt(rdev, true);
3771 
3772 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3773 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3774 
3775 	return 0;
3776 }
3777 
si_gpu_check_soft_reset(struct radeon_device * rdev)3778 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3779 {
3780 	u32 reset_mask = 0;
3781 	u32 tmp;
3782 
3783 	/* GRBM_STATUS */
3784 	tmp = RREG32(GRBM_STATUS);
3785 	if (tmp & (PA_BUSY | SC_BUSY |
3786 		   BCI_BUSY | SX_BUSY |
3787 		   TA_BUSY | VGT_BUSY |
3788 		   DB_BUSY | CB_BUSY |
3789 		   GDS_BUSY | SPI_BUSY |
3790 		   IA_BUSY | IA_BUSY_NO_DMA))
3791 		reset_mask |= RADEON_RESET_GFX;
3792 
3793 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3794 		   CP_BUSY | CP_COHERENCY_BUSY))
3795 		reset_mask |= RADEON_RESET_CP;
3796 
3797 	if (tmp & GRBM_EE_BUSY)
3798 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3799 
3800 	/* GRBM_STATUS2 */
3801 	tmp = RREG32(GRBM_STATUS2);
3802 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3803 		reset_mask |= RADEON_RESET_RLC;
3804 
3805 	/* DMA_STATUS_REG 0 */
3806 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3807 	if (!(tmp & DMA_IDLE))
3808 		reset_mask |= RADEON_RESET_DMA;
3809 
3810 	/* DMA_STATUS_REG 1 */
3811 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3812 	if (!(tmp & DMA_IDLE))
3813 		reset_mask |= RADEON_RESET_DMA1;
3814 
3815 	/* SRBM_STATUS2 */
3816 	tmp = RREG32(SRBM_STATUS2);
3817 	if (tmp & DMA_BUSY)
3818 		reset_mask |= RADEON_RESET_DMA;
3819 
3820 	if (tmp & DMA1_BUSY)
3821 		reset_mask |= RADEON_RESET_DMA1;
3822 
3823 	/* SRBM_STATUS */
3824 	tmp = RREG32(SRBM_STATUS);
3825 
3826 	if (tmp & IH_BUSY)
3827 		reset_mask |= RADEON_RESET_IH;
3828 
3829 	if (tmp & SEM_BUSY)
3830 		reset_mask |= RADEON_RESET_SEM;
3831 
3832 	if (tmp & GRBM_RQ_PENDING)
3833 		reset_mask |= RADEON_RESET_GRBM;
3834 
3835 	if (tmp & VMC_BUSY)
3836 		reset_mask |= RADEON_RESET_VMC;
3837 
3838 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3839 		   MCC_BUSY | MCD_BUSY))
3840 		reset_mask |= RADEON_RESET_MC;
3841 
3842 	if (evergreen_is_display_hung(rdev))
3843 		reset_mask |= RADEON_RESET_DISPLAY;
3844 
3845 	/* VM_L2_STATUS */
3846 	tmp = RREG32(VM_L2_STATUS);
3847 	if (tmp & L2_BUSY)
3848 		reset_mask |= RADEON_RESET_VMC;
3849 
3850 	/* Skip MC reset as it's mostly likely not hung, just busy */
3851 	if (reset_mask & RADEON_RESET_MC) {
3852 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3853 		reset_mask &= ~RADEON_RESET_MC;
3854 	}
3855 
3856 	return reset_mask;
3857 }
3858 
si_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)3859 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3860 {
3861 	struct evergreen_mc_save save;
3862 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3863 	u32 tmp;
3864 
3865 	if (reset_mask == 0)
3866 		return;
3867 
3868 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3869 
3870 	evergreen_print_gpu_status_regs(rdev);
3871 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3872 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3873 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3874 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3875 
3876 	/* disable PG/CG */
3877 	si_fini_pg(rdev);
3878 	si_fini_cg(rdev);
3879 
3880 	/* stop the rlc */
3881 	si_rlc_stop(rdev);
3882 
3883 	/* Disable CP parsing/prefetching */
3884 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3885 
3886 	if (reset_mask & RADEON_RESET_DMA) {
3887 		/* dma0 */
3888 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3889 		tmp &= ~DMA_RB_ENABLE;
3890 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3891 	}
3892 	if (reset_mask & RADEON_RESET_DMA1) {
3893 		/* dma1 */
3894 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3895 		tmp &= ~DMA_RB_ENABLE;
3896 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3897 	}
3898 
3899 	udelay(50);
3900 
3901 	evergreen_mc_stop(rdev, &save);
3902 	if (evergreen_mc_wait_for_idle(rdev)) {
3903 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3904 	}
3905 
3906 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3907 		grbm_soft_reset = SOFT_RESET_CB |
3908 			SOFT_RESET_DB |
3909 			SOFT_RESET_GDS |
3910 			SOFT_RESET_PA |
3911 			SOFT_RESET_SC |
3912 			SOFT_RESET_BCI |
3913 			SOFT_RESET_SPI |
3914 			SOFT_RESET_SX |
3915 			SOFT_RESET_TC |
3916 			SOFT_RESET_TA |
3917 			SOFT_RESET_VGT |
3918 			SOFT_RESET_IA;
3919 	}
3920 
3921 	if (reset_mask & RADEON_RESET_CP) {
3922 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3923 
3924 		srbm_soft_reset |= SOFT_RESET_GRBM;
3925 	}
3926 
3927 	if (reset_mask & RADEON_RESET_DMA)
3928 		srbm_soft_reset |= SOFT_RESET_DMA;
3929 
3930 	if (reset_mask & RADEON_RESET_DMA1)
3931 		srbm_soft_reset |= SOFT_RESET_DMA1;
3932 
3933 	if (reset_mask & RADEON_RESET_DISPLAY)
3934 		srbm_soft_reset |= SOFT_RESET_DC;
3935 
3936 	if (reset_mask & RADEON_RESET_RLC)
3937 		grbm_soft_reset |= SOFT_RESET_RLC;
3938 
3939 	if (reset_mask & RADEON_RESET_SEM)
3940 		srbm_soft_reset |= SOFT_RESET_SEM;
3941 
3942 	if (reset_mask & RADEON_RESET_IH)
3943 		srbm_soft_reset |= SOFT_RESET_IH;
3944 
3945 	if (reset_mask & RADEON_RESET_GRBM)
3946 		srbm_soft_reset |= SOFT_RESET_GRBM;
3947 
3948 	if (reset_mask & RADEON_RESET_VMC)
3949 		srbm_soft_reset |= SOFT_RESET_VMC;
3950 
3951 	if (reset_mask & RADEON_RESET_MC)
3952 		srbm_soft_reset |= SOFT_RESET_MC;
3953 
3954 	if (grbm_soft_reset) {
3955 		tmp = RREG32(GRBM_SOFT_RESET);
3956 		tmp |= grbm_soft_reset;
3957 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3958 		WREG32(GRBM_SOFT_RESET, tmp);
3959 		tmp = RREG32(GRBM_SOFT_RESET);
3960 
3961 		udelay(50);
3962 
3963 		tmp &= ~grbm_soft_reset;
3964 		WREG32(GRBM_SOFT_RESET, tmp);
3965 		tmp = RREG32(GRBM_SOFT_RESET);
3966 	}
3967 
3968 	if (srbm_soft_reset) {
3969 		tmp = RREG32(SRBM_SOFT_RESET);
3970 		tmp |= srbm_soft_reset;
3971 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3972 		WREG32(SRBM_SOFT_RESET, tmp);
3973 		tmp = RREG32(SRBM_SOFT_RESET);
3974 
3975 		udelay(50);
3976 
3977 		tmp &= ~srbm_soft_reset;
3978 		WREG32(SRBM_SOFT_RESET, tmp);
3979 		tmp = RREG32(SRBM_SOFT_RESET);
3980 	}
3981 
3982 	/* Wait a little for things to settle down */
3983 	udelay(50);
3984 
3985 	evergreen_mc_resume(rdev, &save);
3986 	udelay(50);
3987 
3988 	evergreen_print_gpu_status_regs(rdev);
3989 }
3990 
si_set_clk_bypass_mode(struct radeon_device * rdev)3991 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3992 {
3993 	u32 tmp, i;
3994 
3995 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3996 	tmp |= SPLL_BYPASS_EN;
3997 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3998 
3999 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4000 	tmp |= SPLL_CTLREQ_CHG;
4001 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4002 
4003 	for (i = 0; i < rdev->usec_timeout; i++) {
4004 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
4005 			break;
4006 		udelay(1);
4007 	}
4008 
4009 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4010 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4011 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4012 
4013 	tmp = RREG32(MPLL_CNTL_MODE);
4014 	tmp &= ~MPLL_MCLK_SEL;
4015 	WREG32(MPLL_CNTL_MODE, tmp);
4016 }
4017 
si_spll_powerdown(struct radeon_device * rdev)4018 static void si_spll_powerdown(struct radeon_device *rdev)
4019 {
4020 	u32 tmp;
4021 
4022 	tmp = RREG32(SPLL_CNTL_MODE);
4023 	tmp |= SPLL_SW_DIR_CONTROL;
4024 	WREG32(SPLL_CNTL_MODE, tmp);
4025 
4026 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4027 	tmp |= SPLL_RESET;
4028 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4029 
4030 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4031 	tmp |= SPLL_SLEEP;
4032 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4033 
4034 	tmp = RREG32(SPLL_CNTL_MODE);
4035 	tmp &= ~SPLL_SW_DIR_CONTROL;
4036 	WREG32(SPLL_CNTL_MODE, tmp);
4037 }
4038 
si_gpu_pci_config_reset(struct radeon_device * rdev)4039 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4040 {
4041 	struct evergreen_mc_save save;
4042 	u32 tmp, i;
4043 
4044 	dev_info(rdev->dev, "GPU pci config reset\n");
4045 
4046 	/* disable dpm? */
4047 
4048 	/* disable cg/pg */
4049 	si_fini_pg(rdev);
4050 	si_fini_cg(rdev);
4051 
4052 	/* Disable CP parsing/prefetching */
4053 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4054 	/* dma0 */
4055 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4056 	tmp &= ~DMA_RB_ENABLE;
4057 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4058 	/* dma1 */
4059 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4060 	tmp &= ~DMA_RB_ENABLE;
4061 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4062 	/* XXX other engines? */
4063 
4064 	/* halt the rlc, disable cp internal ints */
4065 	si_rlc_stop(rdev);
4066 
4067 	udelay(50);
4068 
4069 	/* disable mem access */
4070 	evergreen_mc_stop(rdev, &save);
4071 	if (evergreen_mc_wait_for_idle(rdev)) {
4072 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4073 	}
4074 
4075 	/* set mclk/sclk to bypass */
4076 	si_set_clk_bypass_mode(rdev);
4077 	/* powerdown spll */
4078 	si_spll_powerdown(rdev);
4079 	/* disable BM */
4080 	pci_clear_master(rdev->pdev);
4081 	/* reset */
4082 	radeon_pci_config_reset(rdev);
4083 	/* wait for asic to come out of reset */
4084 	for (i = 0; i < rdev->usec_timeout; i++) {
4085 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4086 			break;
4087 		udelay(1);
4088 	}
4089 }
4090 
si_asic_reset(struct radeon_device * rdev,bool hard)4091 int si_asic_reset(struct radeon_device *rdev, bool hard)
4092 {
4093 	u32 reset_mask;
4094 
4095 	if (hard) {
4096 		si_gpu_pci_config_reset(rdev);
4097 		return 0;
4098 	}
4099 
4100 	reset_mask = si_gpu_check_soft_reset(rdev);
4101 
4102 	if (reset_mask)
4103 		r600_set_bios_scratch_engine_hung(rdev, true);
4104 
4105 	/* try soft reset */
4106 	si_gpu_soft_reset(rdev, reset_mask);
4107 
4108 	reset_mask = si_gpu_check_soft_reset(rdev);
4109 
4110 	/* try pci config reset */
4111 	if (reset_mask && radeon_hard_reset)
4112 		si_gpu_pci_config_reset(rdev);
4113 
4114 	reset_mask = si_gpu_check_soft_reset(rdev);
4115 
4116 	if (!reset_mask)
4117 		r600_set_bios_scratch_engine_hung(rdev, false);
4118 
4119 	return 0;
4120 }
4121 
4122 /**
4123  * si_gfx_is_lockup - Check if the GFX engine is locked up
4124  *
4125  * @rdev: radeon_device pointer
4126  * @ring: radeon_ring structure holding ring information
4127  *
4128  * Check if the GFX engine is locked up.
4129  * Returns true if the engine appears to be locked up, false if not.
4130  */
si_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)4131 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4132 {
4133 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4134 
4135 	if (!(reset_mask & (RADEON_RESET_GFX |
4136 			    RADEON_RESET_COMPUTE |
4137 			    RADEON_RESET_CP))) {
4138 		radeon_ring_lockup_update(rdev, ring);
4139 		return false;
4140 	}
4141 	return radeon_ring_test_lockup(rdev, ring);
4142 }
4143 
4144 /* MC */
si_mc_program(struct radeon_device * rdev)4145 static void si_mc_program(struct radeon_device *rdev)
4146 {
4147 	struct evergreen_mc_save save;
4148 	u32 tmp;
4149 	int i, j;
4150 
4151 	/* Initialize HDP */
4152 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4153 		WREG32((0x2c14 + j), 0x00000000);
4154 		WREG32((0x2c18 + j), 0x00000000);
4155 		WREG32((0x2c1c + j), 0x00000000);
4156 		WREG32((0x2c20 + j), 0x00000000);
4157 		WREG32((0x2c24 + j), 0x00000000);
4158 	}
4159 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4160 
4161 	evergreen_mc_stop(rdev, &save);
4162 	if (radeon_mc_wait_for_idle(rdev)) {
4163 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4164 	}
4165 	if (!ASIC_IS_NODCE(rdev))
4166 		/* Lockout access through VGA aperture*/
4167 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4168 	/* Update configuration */
4169 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4170 	       rdev->mc.vram_start >> 12);
4171 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4172 	       rdev->mc.vram_end >> 12);
4173 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4174 	       rdev->vram_scratch.gpu_addr >> 12);
4175 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4176 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4177 	WREG32(MC_VM_FB_LOCATION, tmp);
4178 	/* XXX double check these! */
4179 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4180 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4181 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4182 	WREG32(MC_VM_AGP_BASE, 0);
4183 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4184 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4185 	if (radeon_mc_wait_for_idle(rdev)) {
4186 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4187 	}
4188 	evergreen_mc_resume(rdev, &save);
4189 	if (!ASIC_IS_NODCE(rdev)) {
4190 		/* we need to own VRAM, so turn off the VGA renderer here
4191 		 * to stop it overwriting our objects */
4192 		rv515_vga_render_disable(rdev);
4193 	}
4194 }
4195 
si_vram_gtt_location(struct radeon_device * rdev,struct radeon_mc * mc)4196 void si_vram_gtt_location(struct radeon_device *rdev,
4197 			  struct radeon_mc *mc)
4198 {
4199 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4200 		/* leave room for at least 1024M GTT */
4201 		dev_warn(rdev->dev, "limiting VRAM\n");
4202 		mc->real_vram_size = 0xFFC0000000ULL;
4203 		mc->mc_vram_size = 0xFFC0000000ULL;
4204 	}
4205 	radeon_vram_location(rdev, &rdev->mc, 0);
4206 	rdev->mc.gtt_base_align = 0;
4207 	radeon_gtt_location(rdev, mc);
4208 }
4209 
si_mc_init(struct radeon_device * rdev)4210 static int si_mc_init(struct radeon_device *rdev)
4211 {
4212 	u32 tmp;
4213 	int chansize, numchan;
4214 
4215 	/* Get VRAM informations */
4216 	rdev->mc.vram_is_ddr = true;
4217 	tmp = RREG32(MC_ARB_RAMCFG);
4218 	if (tmp & CHANSIZE_OVERRIDE) {
4219 		chansize = 16;
4220 	} else if (tmp & CHANSIZE_MASK) {
4221 		chansize = 64;
4222 	} else {
4223 		chansize = 32;
4224 	}
4225 	tmp = RREG32(MC_SHARED_CHMAP);
4226 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4227 	case 0:
4228 	default:
4229 		numchan = 1;
4230 		break;
4231 	case 1:
4232 		numchan = 2;
4233 		break;
4234 	case 2:
4235 		numchan = 4;
4236 		break;
4237 	case 3:
4238 		numchan = 8;
4239 		break;
4240 	case 4:
4241 		numchan = 3;
4242 		break;
4243 	case 5:
4244 		numchan = 6;
4245 		break;
4246 	case 6:
4247 		numchan = 10;
4248 		break;
4249 	case 7:
4250 		numchan = 12;
4251 		break;
4252 	case 8:
4253 		numchan = 16;
4254 		break;
4255 	}
4256 	rdev->mc.vram_width = numchan * chansize;
4257 	/* Could aper size report 0 ? */
4258 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4259 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4260 	/* size in MB on si */
4261 	tmp = RREG32(CONFIG_MEMSIZE);
4262 	/* some boards may have garbage in the upper 16 bits */
4263 	if (tmp & 0xffff0000) {
4264 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4265 		if (tmp & 0xffff)
4266 			tmp &= 0xffff;
4267 	}
4268 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4269 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4270 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4271 	si_vram_gtt_location(rdev, &rdev->mc);
4272 	radeon_update_bandwidth_info(rdev);
4273 
4274 	return 0;
4275 }
4276 
4277 /*
4278  * GART
4279  */
si_pcie_gart_tlb_flush(struct radeon_device * rdev)4280 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4281 {
4282 	/* flush hdp cache */
4283 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4284 
4285 	/* bits 0-15 are the VM contexts0-15 */
4286 	WREG32(VM_INVALIDATE_REQUEST, 1);
4287 }
4288 
si_pcie_gart_enable(struct radeon_device * rdev)4289 static int si_pcie_gart_enable(struct radeon_device *rdev)
4290 {
4291 	int r, i;
4292 
4293 	if (rdev->gart.robj == NULL) {
4294 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4295 		return -EINVAL;
4296 	}
4297 	r = radeon_gart_table_vram_pin(rdev);
4298 	if (r)
4299 		return r;
4300 	/* Setup TLB control */
4301 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4302 	       (0xA << 7) |
4303 	       ENABLE_L1_TLB |
4304 	       ENABLE_L1_FRAGMENT_PROCESSING |
4305 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4306 	       ENABLE_ADVANCED_DRIVER_MODEL |
4307 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4308 	/* Setup L2 cache */
4309 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4310 	       ENABLE_L2_FRAGMENT_PROCESSING |
4311 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4312 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4313 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4314 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4315 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4316 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4317 	       BANK_SELECT(4) |
4318 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4319 	/* setup context0 */
4320 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4321 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4322 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4323 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4324 			(u32)(rdev->dummy_page.addr >> 12));
4325 	WREG32(VM_CONTEXT0_CNTL2, 0);
4326 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4327 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4328 
4329 	WREG32(0x15D4, 0);
4330 	WREG32(0x15D8, 0);
4331 	WREG32(0x15DC, 0);
4332 
4333 	/* empty context1-15 */
4334 	/* set vm size, must be a multiple of 4 */
4335 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4336 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4337 	/* Assign the pt base to something valid for now; the pts used for
4338 	 * the VMs are determined by the application and setup and assigned
4339 	 * on the fly in the vm part of radeon_gart.c
4340 	 */
4341 	for (i = 1; i < 16; i++) {
4342 		if (i < 8)
4343 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4344 			       rdev->vm_manager.saved_table_addr[i]);
4345 		else
4346 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4347 			       rdev->vm_manager.saved_table_addr[i]);
4348 	}
4349 
4350 	/* enable context1-15 */
4351 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4352 	       (u32)(rdev->dummy_page.addr >> 12));
4353 	WREG32(VM_CONTEXT1_CNTL2, 4);
4354 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4355 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4356 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4357 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4358 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4359 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4360 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4361 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4362 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4363 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4364 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4365 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4366 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4367 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4368 
4369 	si_pcie_gart_tlb_flush(rdev);
4370 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4371 		 (unsigned)(rdev->mc.gtt_size >> 20),
4372 		 (unsigned long long)rdev->gart.table_addr);
4373 	rdev->gart.ready = true;
4374 	return 0;
4375 }
4376 
si_pcie_gart_disable(struct radeon_device * rdev)4377 static void si_pcie_gart_disable(struct radeon_device *rdev)
4378 {
4379 	unsigned i;
4380 
4381 	for (i = 1; i < 16; ++i) {
4382 		uint32_t reg;
4383 		if (i < 8)
4384 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4385 		else
4386 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4387 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4388 	}
4389 
4390 	/* Disable all tables */
4391 	WREG32(VM_CONTEXT0_CNTL, 0);
4392 	WREG32(VM_CONTEXT1_CNTL, 0);
4393 	/* Setup TLB control */
4394 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4395 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4396 	/* Setup L2 cache */
4397 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4398 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4399 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4400 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4401 	WREG32(VM_L2_CNTL2, 0);
4402 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4403 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4404 	radeon_gart_table_vram_unpin(rdev);
4405 }
4406 
si_pcie_gart_fini(struct radeon_device * rdev)4407 static void si_pcie_gart_fini(struct radeon_device *rdev)
4408 {
4409 	si_pcie_gart_disable(rdev);
4410 	radeon_gart_table_vram_free(rdev);
4411 	radeon_gart_fini(rdev);
4412 }
4413 
4414 /* vm parser */
si_vm_reg_valid(u32 reg)4415 static bool si_vm_reg_valid(u32 reg)
4416 {
4417 	/* context regs are fine */
4418 	if (reg >= 0x28000)
4419 		return true;
4420 
4421 	/* shader regs are also fine */
4422 	if (reg >= 0xB000 && reg < 0xC000)
4423 		return true;
4424 
4425 	/* check config regs */
4426 	switch (reg) {
4427 	case GRBM_GFX_INDEX:
4428 	case CP_STRMOUT_CNTL:
4429 	case VGT_VTX_VECT_EJECT_REG:
4430 	case VGT_CACHE_INVALIDATION:
4431 	case VGT_ESGS_RING_SIZE:
4432 	case VGT_GSVS_RING_SIZE:
4433 	case VGT_GS_VERTEX_REUSE:
4434 	case VGT_PRIMITIVE_TYPE:
4435 	case VGT_INDEX_TYPE:
4436 	case VGT_NUM_INDICES:
4437 	case VGT_NUM_INSTANCES:
4438 	case VGT_TF_RING_SIZE:
4439 	case VGT_HS_OFFCHIP_PARAM:
4440 	case VGT_TF_MEMORY_BASE:
4441 	case PA_CL_ENHANCE:
4442 	case PA_SU_LINE_STIPPLE_VALUE:
4443 	case PA_SC_LINE_STIPPLE_STATE:
4444 	case PA_SC_ENHANCE:
4445 	case SQC_CACHES:
4446 	case SPI_STATIC_THREAD_MGMT_1:
4447 	case SPI_STATIC_THREAD_MGMT_2:
4448 	case SPI_STATIC_THREAD_MGMT_3:
4449 	case SPI_PS_MAX_WAVE_ID:
4450 	case SPI_CONFIG_CNTL:
4451 	case SPI_CONFIG_CNTL_1:
4452 	case TA_CNTL_AUX:
4453 	case TA_CS_BC_BASE_ADDR:
4454 		return true;
4455 	default:
4456 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4457 		return false;
4458 	}
4459 }
4460 
si_vm_packet3_ce_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)4461 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4462 				  u32 *ib, struct radeon_cs_packet *pkt)
4463 {
4464 	switch (pkt->opcode) {
4465 	case PACKET3_NOP:
4466 	case PACKET3_SET_BASE:
4467 	case PACKET3_SET_CE_DE_COUNTERS:
4468 	case PACKET3_LOAD_CONST_RAM:
4469 	case PACKET3_WRITE_CONST_RAM:
4470 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4471 	case PACKET3_DUMP_CONST_RAM:
4472 	case PACKET3_INCREMENT_CE_COUNTER:
4473 	case PACKET3_WAIT_ON_DE_COUNTER:
4474 	case PACKET3_CE_WRITE:
4475 		break;
4476 	default:
4477 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4478 		return -EINVAL;
4479 	}
4480 	return 0;
4481 }
4482 
si_vm_packet3_cp_dma_check(u32 * ib,u32 idx)4483 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4484 {
4485 	u32 start_reg, reg, i;
4486 	u32 command = ib[idx + 4];
4487 	u32 info = ib[idx + 1];
4488 	u32 idx_value = ib[idx];
4489 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4490 		/* src address space is register */
4491 		if (((info & 0x60000000) >> 29) == 0) {
4492 			start_reg = idx_value << 2;
4493 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4494 				reg = start_reg;
4495 				if (!si_vm_reg_valid(reg)) {
4496 					DRM_ERROR("CP DMA Bad SRC register\n");
4497 					return -EINVAL;
4498 				}
4499 			} else {
4500 				for (i = 0; i < (command & 0x1fffff); i++) {
4501 					reg = start_reg + (4 * i);
4502 					if (!si_vm_reg_valid(reg)) {
4503 						DRM_ERROR("CP DMA Bad SRC register\n");
4504 						return -EINVAL;
4505 					}
4506 				}
4507 			}
4508 		}
4509 	}
4510 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4511 		/* dst address space is register */
4512 		if (((info & 0x00300000) >> 20) == 0) {
4513 			start_reg = ib[idx + 2];
4514 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4515 				reg = start_reg;
4516 				if (!si_vm_reg_valid(reg)) {
4517 					DRM_ERROR("CP DMA Bad DST register\n");
4518 					return -EINVAL;
4519 				}
4520 			} else {
4521 				for (i = 0; i < (command & 0x1fffff); i++) {
4522 					reg = start_reg + (4 * i);
4523 				if (!si_vm_reg_valid(reg)) {
4524 						DRM_ERROR("CP DMA Bad DST register\n");
4525 						return -EINVAL;
4526 					}
4527 				}
4528 			}
4529 		}
4530 	}
4531 	return 0;
4532 }
4533 
si_vm_packet3_gfx_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)4534 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4535 				   u32 *ib, struct radeon_cs_packet *pkt)
4536 {
4537 	int r;
4538 	u32 idx = pkt->idx + 1;
4539 	u32 idx_value = ib[idx];
4540 	u32 start_reg, end_reg, reg, i;
4541 
4542 	switch (pkt->opcode) {
4543 	case PACKET3_NOP:
4544 	case PACKET3_SET_BASE:
4545 	case PACKET3_CLEAR_STATE:
4546 	case PACKET3_INDEX_BUFFER_SIZE:
4547 	case PACKET3_DISPATCH_DIRECT:
4548 	case PACKET3_DISPATCH_INDIRECT:
4549 	case PACKET3_ALLOC_GDS:
4550 	case PACKET3_WRITE_GDS_RAM:
4551 	case PACKET3_ATOMIC_GDS:
4552 	case PACKET3_ATOMIC:
4553 	case PACKET3_OCCLUSION_QUERY:
4554 	case PACKET3_SET_PREDICATION:
4555 	case PACKET3_COND_EXEC:
4556 	case PACKET3_PRED_EXEC:
4557 	case PACKET3_DRAW_INDIRECT:
4558 	case PACKET3_DRAW_INDEX_INDIRECT:
4559 	case PACKET3_INDEX_BASE:
4560 	case PACKET3_DRAW_INDEX_2:
4561 	case PACKET3_CONTEXT_CONTROL:
4562 	case PACKET3_INDEX_TYPE:
4563 	case PACKET3_DRAW_INDIRECT_MULTI:
4564 	case PACKET3_DRAW_INDEX_AUTO:
4565 	case PACKET3_DRAW_INDEX_IMMD:
4566 	case PACKET3_NUM_INSTANCES:
4567 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4568 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4569 	case PACKET3_DRAW_INDEX_OFFSET_2:
4570 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4571 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4572 	case PACKET3_MPEG_INDEX:
4573 	case PACKET3_WAIT_REG_MEM:
4574 	case PACKET3_MEM_WRITE:
4575 	case PACKET3_PFP_SYNC_ME:
4576 	case PACKET3_SURFACE_SYNC:
4577 	case PACKET3_EVENT_WRITE:
4578 	case PACKET3_EVENT_WRITE_EOP:
4579 	case PACKET3_EVENT_WRITE_EOS:
4580 	case PACKET3_SET_CONTEXT_REG:
4581 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4582 	case PACKET3_SET_SH_REG:
4583 	case PACKET3_SET_SH_REG_OFFSET:
4584 	case PACKET3_INCREMENT_DE_COUNTER:
4585 	case PACKET3_WAIT_ON_CE_COUNTER:
4586 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4587 	case PACKET3_ME_WRITE:
4588 		break;
4589 	case PACKET3_COPY_DATA:
4590 		if ((idx_value & 0xf00) == 0) {
4591 			reg = ib[idx + 3] * 4;
4592 			if (!si_vm_reg_valid(reg))
4593 				return -EINVAL;
4594 		}
4595 		break;
4596 	case PACKET3_WRITE_DATA:
4597 		if ((idx_value & 0xf00) == 0) {
4598 			start_reg = ib[idx + 1] * 4;
4599 			if (idx_value & 0x10000) {
4600 				if (!si_vm_reg_valid(start_reg))
4601 					return -EINVAL;
4602 			} else {
4603 				for (i = 0; i < (pkt->count - 2); i++) {
4604 					reg = start_reg + (4 * i);
4605 					if (!si_vm_reg_valid(reg))
4606 						return -EINVAL;
4607 				}
4608 			}
4609 		}
4610 		break;
4611 	case PACKET3_COND_WRITE:
4612 		if (idx_value & 0x100) {
4613 			reg = ib[idx + 5] * 4;
4614 			if (!si_vm_reg_valid(reg))
4615 				return -EINVAL;
4616 		}
4617 		break;
4618 	case PACKET3_COPY_DW:
4619 		if (idx_value & 0x2) {
4620 			reg = ib[idx + 3] * 4;
4621 			if (!si_vm_reg_valid(reg))
4622 				return -EINVAL;
4623 		}
4624 		break;
4625 	case PACKET3_SET_CONFIG_REG:
4626 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4627 		end_reg = 4 * pkt->count + start_reg - 4;
4628 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4629 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4630 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4631 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4632 			return -EINVAL;
4633 		}
4634 		for (i = 0; i < pkt->count; i++) {
4635 			reg = start_reg + (4 * i);
4636 			if (!si_vm_reg_valid(reg))
4637 				return -EINVAL;
4638 		}
4639 		break;
4640 	case PACKET3_CP_DMA:
4641 		r = si_vm_packet3_cp_dma_check(ib, idx);
4642 		if (r)
4643 			return r;
4644 		break;
4645 	default:
4646 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4647 		return -EINVAL;
4648 	}
4649 	return 0;
4650 }
4651 
si_vm_packet3_compute_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)4652 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4653 				       u32 *ib, struct radeon_cs_packet *pkt)
4654 {
4655 	int r;
4656 	u32 idx = pkt->idx + 1;
4657 	u32 idx_value = ib[idx];
4658 	u32 start_reg, reg, i;
4659 
4660 	switch (pkt->opcode) {
4661 	case PACKET3_NOP:
4662 	case PACKET3_SET_BASE:
4663 	case PACKET3_CLEAR_STATE:
4664 	case PACKET3_DISPATCH_DIRECT:
4665 	case PACKET3_DISPATCH_INDIRECT:
4666 	case PACKET3_ALLOC_GDS:
4667 	case PACKET3_WRITE_GDS_RAM:
4668 	case PACKET3_ATOMIC_GDS:
4669 	case PACKET3_ATOMIC:
4670 	case PACKET3_OCCLUSION_QUERY:
4671 	case PACKET3_SET_PREDICATION:
4672 	case PACKET3_COND_EXEC:
4673 	case PACKET3_PRED_EXEC:
4674 	case PACKET3_CONTEXT_CONTROL:
4675 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4676 	case PACKET3_WAIT_REG_MEM:
4677 	case PACKET3_MEM_WRITE:
4678 	case PACKET3_PFP_SYNC_ME:
4679 	case PACKET3_SURFACE_SYNC:
4680 	case PACKET3_EVENT_WRITE:
4681 	case PACKET3_EVENT_WRITE_EOP:
4682 	case PACKET3_EVENT_WRITE_EOS:
4683 	case PACKET3_SET_CONTEXT_REG:
4684 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4685 	case PACKET3_SET_SH_REG:
4686 	case PACKET3_SET_SH_REG_OFFSET:
4687 	case PACKET3_INCREMENT_DE_COUNTER:
4688 	case PACKET3_WAIT_ON_CE_COUNTER:
4689 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4690 	case PACKET3_ME_WRITE:
4691 		break;
4692 	case PACKET3_COPY_DATA:
4693 		if ((idx_value & 0xf00) == 0) {
4694 			reg = ib[idx + 3] * 4;
4695 			if (!si_vm_reg_valid(reg))
4696 				return -EINVAL;
4697 		}
4698 		break;
4699 	case PACKET3_WRITE_DATA:
4700 		if ((idx_value & 0xf00) == 0) {
4701 			start_reg = ib[idx + 1] * 4;
4702 			if (idx_value & 0x10000) {
4703 				if (!si_vm_reg_valid(start_reg))
4704 					return -EINVAL;
4705 			} else {
4706 				for (i = 0; i < (pkt->count - 2); i++) {
4707 					reg = start_reg + (4 * i);
4708 					if (!si_vm_reg_valid(reg))
4709 						return -EINVAL;
4710 				}
4711 			}
4712 		}
4713 		break;
4714 	case PACKET3_COND_WRITE:
4715 		if (idx_value & 0x100) {
4716 			reg = ib[idx + 5] * 4;
4717 			if (!si_vm_reg_valid(reg))
4718 				return -EINVAL;
4719 		}
4720 		break;
4721 	case PACKET3_COPY_DW:
4722 		if (idx_value & 0x2) {
4723 			reg = ib[idx + 3] * 4;
4724 			if (!si_vm_reg_valid(reg))
4725 				return -EINVAL;
4726 		}
4727 		break;
4728 	case PACKET3_CP_DMA:
4729 		r = si_vm_packet3_cp_dma_check(ib, idx);
4730 		if (r)
4731 			return r;
4732 		break;
4733 	default:
4734 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4735 		return -EINVAL;
4736 	}
4737 	return 0;
4738 }
4739 
si_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)4740 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4741 {
4742 	int ret = 0;
4743 	u32 idx = 0, i;
4744 	struct radeon_cs_packet pkt;
4745 
4746 	do {
4747 		pkt.idx = idx;
4748 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4749 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4750 		pkt.one_reg_wr = 0;
4751 		switch (pkt.type) {
4752 		case RADEON_PACKET_TYPE0:
4753 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4754 			ret = -EINVAL;
4755 			break;
4756 		case RADEON_PACKET_TYPE2:
4757 			idx += 1;
4758 			break;
4759 		case RADEON_PACKET_TYPE3:
4760 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4761 			if (ib->is_const_ib)
4762 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4763 			else {
4764 				switch (ib->ring) {
4765 				case RADEON_RING_TYPE_GFX_INDEX:
4766 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4767 					break;
4768 				case CAYMAN_RING_TYPE_CP1_INDEX:
4769 				case CAYMAN_RING_TYPE_CP2_INDEX:
4770 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4771 					break;
4772 				default:
4773 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4774 					ret = -EINVAL;
4775 					break;
4776 				}
4777 			}
4778 			idx += pkt.count + 2;
4779 			break;
4780 		default:
4781 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4782 			ret = -EINVAL;
4783 			break;
4784 		}
4785 		if (ret) {
4786 			for (i = 0; i < ib->length_dw; i++) {
4787 				if (i == idx)
4788 					printk("\t0x%08x <---\n", ib->ptr[i]);
4789 				else
4790 					printk("\t0x%08x\n", ib->ptr[i]);
4791 			}
4792 			break;
4793 		}
4794 	} while (idx < ib->length_dw);
4795 
4796 	return ret;
4797 }
4798 
4799 /*
4800  * vm
4801  */
si_vm_init(struct radeon_device * rdev)4802 int si_vm_init(struct radeon_device *rdev)
4803 {
4804 	/* number of VMs */
4805 	rdev->vm_manager.nvm = 16;
4806 	/* base offset of vram pages */
4807 	rdev->vm_manager.vram_base_offset = 0;
4808 
4809 	return 0;
4810 }
4811 
si_vm_fini(struct radeon_device * rdev)4812 void si_vm_fini(struct radeon_device *rdev)
4813 {
4814 }
4815 
4816 /**
4817  * si_vm_decode_fault - print human readable fault info
4818  *
4819  * @rdev: radeon_device pointer
4820  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4821  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4822  *
4823  * Print human readable fault information (SI).
4824  */
si_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr)4825 static void si_vm_decode_fault(struct radeon_device *rdev,
4826 			       u32 status, u32 addr)
4827 {
4828 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4829 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4830 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4831 	char *block;
4832 
4833 	if (rdev->family == CHIP_TAHITI) {
4834 		switch (mc_id) {
4835 		case 160:
4836 		case 144:
4837 		case 96:
4838 		case 80:
4839 		case 224:
4840 		case 208:
4841 		case 32:
4842 		case 16:
4843 			block = "CB";
4844 			break;
4845 		case 161:
4846 		case 145:
4847 		case 97:
4848 		case 81:
4849 		case 225:
4850 		case 209:
4851 		case 33:
4852 		case 17:
4853 			block = "CB_FMASK";
4854 			break;
4855 		case 162:
4856 		case 146:
4857 		case 98:
4858 		case 82:
4859 		case 226:
4860 		case 210:
4861 		case 34:
4862 		case 18:
4863 			block = "CB_CMASK";
4864 			break;
4865 		case 163:
4866 		case 147:
4867 		case 99:
4868 		case 83:
4869 		case 227:
4870 		case 211:
4871 		case 35:
4872 		case 19:
4873 			block = "CB_IMMED";
4874 			break;
4875 		case 164:
4876 		case 148:
4877 		case 100:
4878 		case 84:
4879 		case 228:
4880 		case 212:
4881 		case 36:
4882 		case 20:
4883 			block = "DB";
4884 			break;
4885 		case 165:
4886 		case 149:
4887 		case 101:
4888 		case 85:
4889 		case 229:
4890 		case 213:
4891 		case 37:
4892 		case 21:
4893 			block = "DB_HTILE";
4894 			break;
4895 		case 167:
4896 		case 151:
4897 		case 103:
4898 		case 87:
4899 		case 231:
4900 		case 215:
4901 		case 39:
4902 		case 23:
4903 			block = "DB_STEN";
4904 			break;
4905 		case 72:
4906 		case 68:
4907 		case 64:
4908 		case 8:
4909 		case 4:
4910 		case 0:
4911 		case 136:
4912 		case 132:
4913 		case 128:
4914 		case 200:
4915 		case 196:
4916 		case 192:
4917 			block = "TC";
4918 			break;
4919 		case 112:
4920 		case 48:
4921 			block = "CP";
4922 			break;
4923 		case 49:
4924 		case 177:
4925 		case 50:
4926 		case 178:
4927 			block = "SH";
4928 			break;
4929 		case 53:
4930 		case 190:
4931 			block = "VGT";
4932 			break;
4933 		case 117:
4934 			block = "IH";
4935 			break;
4936 		case 51:
4937 		case 115:
4938 			block = "RLC";
4939 			break;
4940 		case 119:
4941 		case 183:
4942 			block = "DMA0";
4943 			break;
4944 		case 61:
4945 			block = "DMA1";
4946 			break;
4947 		case 248:
4948 		case 120:
4949 			block = "HDP";
4950 			break;
4951 		default:
4952 			block = "unknown";
4953 			break;
4954 		}
4955 	} else {
4956 		switch (mc_id) {
4957 		case 32:
4958 		case 16:
4959 		case 96:
4960 		case 80:
4961 		case 160:
4962 		case 144:
4963 		case 224:
4964 		case 208:
4965 			block = "CB";
4966 			break;
4967 		case 33:
4968 		case 17:
4969 		case 97:
4970 		case 81:
4971 		case 161:
4972 		case 145:
4973 		case 225:
4974 		case 209:
4975 			block = "CB_FMASK";
4976 			break;
4977 		case 34:
4978 		case 18:
4979 		case 98:
4980 		case 82:
4981 		case 162:
4982 		case 146:
4983 		case 226:
4984 		case 210:
4985 			block = "CB_CMASK";
4986 			break;
4987 		case 35:
4988 		case 19:
4989 		case 99:
4990 		case 83:
4991 		case 163:
4992 		case 147:
4993 		case 227:
4994 		case 211:
4995 			block = "CB_IMMED";
4996 			break;
4997 		case 36:
4998 		case 20:
4999 		case 100:
5000 		case 84:
5001 		case 164:
5002 		case 148:
5003 		case 228:
5004 		case 212:
5005 			block = "DB";
5006 			break;
5007 		case 37:
5008 		case 21:
5009 		case 101:
5010 		case 85:
5011 		case 165:
5012 		case 149:
5013 		case 229:
5014 		case 213:
5015 			block = "DB_HTILE";
5016 			break;
5017 		case 39:
5018 		case 23:
5019 		case 103:
5020 		case 87:
5021 		case 167:
5022 		case 151:
5023 		case 231:
5024 		case 215:
5025 			block = "DB_STEN";
5026 			break;
5027 		case 72:
5028 		case 68:
5029 		case 8:
5030 		case 4:
5031 		case 136:
5032 		case 132:
5033 		case 200:
5034 		case 196:
5035 			block = "TC";
5036 			break;
5037 		case 112:
5038 		case 48:
5039 			block = "CP";
5040 			break;
5041 		case 49:
5042 		case 177:
5043 		case 50:
5044 		case 178:
5045 			block = "SH";
5046 			break;
5047 		case 53:
5048 			block = "VGT";
5049 			break;
5050 		case 117:
5051 			block = "IH";
5052 			break;
5053 		case 51:
5054 		case 115:
5055 			block = "RLC";
5056 			break;
5057 		case 119:
5058 		case 183:
5059 			block = "DMA0";
5060 			break;
5061 		case 61:
5062 			block = "DMA1";
5063 			break;
5064 		case 248:
5065 		case 120:
5066 			block = "HDP";
5067 			break;
5068 		default:
5069 			block = "unknown";
5070 			break;
5071 		}
5072 	}
5073 
5074 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5075 	       protections, vmid, addr,
5076 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5077 	       block, mc_id);
5078 }
5079 
si_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)5080 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5081 		 unsigned vm_id, uint64_t pd_addr)
5082 {
5083 	/* write new base address */
5084 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5085 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5086 				 WRITE_DATA_DST_SEL(0)));
5087 
5088 	if (vm_id < 8) {
5089 		radeon_ring_write(ring,
5090 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5091 	} else {
5092 		radeon_ring_write(ring,
5093 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5094 	}
5095 	radeon_ring_write(ring, 0);
5096 	radeon_ring_write(ring, pd_addr >> 12);
5097 
5098 	/* flush hdp cache */
5099 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5100 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5101 				 WRITE_DATA_DST_SEL(0)));
5102 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5103 	radeon_ring_write(ring, 0);
5104 	radeon_ring_write(ring, 0x1);
5105 
5106 	/* bits 0-15 are the VM contexts0-15 */
5107 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5108 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5109 				 WRITE_DATA_DST_SEL(0)));
5110 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5111 	radeon_ring_write(ring, 0);
5112 	radeon_ring_write(ring, 1 << vm_id);
5113 
5114 	/* wait for the invalidate to complete */
5115 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5116 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5117 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5118 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5119 	radeon_ring_write(ring, 0);
5120 	radeon_ring_write(ring, 0); /* ref */
5121 	radeon_ring_write(ring, 0); /* mask */
5122 	radeon_ring_write(ring, 0x20); /* poll interval */
5123 
5124 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5125 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5126 	radeon_ring_write(ring, 0x0);
5127 }
5128 
5129 /*
5130  *  Power and clock gating
5131  */
si_wait_for_rlc_serdes(struct radeon_device * rdev)5132 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5133 {
5134 	int i;
5135 
5136 	for (i = 0; i < rdev->usec_timeout; i++) {
5137 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5138 			break;
5139 		udelay(1);
5140 	}
5141 
5142 	for (i = 0; i < rdev->usec_timeout; i++) {
5143 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5144 			break;
5145 		udelay(1);
5146 	}
5147 }
5148 
si_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)5149 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5150 					 bool enable)
5151 {
5152 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5153 	u32 mask;
5154 	int i;
5155 
5156 	if (enable)
5157 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5158 	else
5159 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5160 	WREG32(CP_INT_CNTL_RING0, tmp);
5161 
5162 	if (!enable) {
5163 		/* read a gfx register */
5164 		tmp = RREG32(DB_DEPTH_INFO);
5165 
5166 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5167 		for (i = 0; i < rdev->usec_timeout; i++) {
5168 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5169 				break;
5170 			udelay(1);
5171 		}
5172 	}
5173 }
5174 
si_set_uvd_dcm(struct radeon_device * rdev,bool sw_mode)5175 static void si_set_uvd_dcm(struct radeon_device *rdev,
5176 			   bool sw_mode)
5177 {
5178 	u32 tmp, tmp2;
5179 
5180 	tmp = RREG32(UVD_CGC_CTRL);
5181 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5182 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5183 
5184 	if (sw_mode) {
5185 		tmp &= ~0x7ffff800;
5186 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5187 	} else {
5188 		tmp |= 0x7ffff800;
5189 		tmp2 = 0;
5190 	}
5191 
5192 	WREG32(UVD_CGC_CTRL, tmp);
5193 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5194 }
5195 
si_init_uvd_internal_cg(struct radeon_device * rdev)5196 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5197 {
5198 	bool hw_mode = true;
5199 
5200 	if (hw_mode) {
5201 		si_set_uvd_dcm(rdev, false);
5202 	} else {
5203 		u32 tmp = RREG32(UVD_CGC_CTRL);
5204 		tmp &= ~DCM;
5205 		WREG32(UVD_CGC_CTRL, tmp);
5206 	}
5207 }
5208 
si_halt_rlc(struct radeon_device * rdev)5209 static u32 si_halt_rlc(struct radeon_device *rdev)
5210 {
5211 	u32 data, orig;
5212 
5213 	orig = data = RREG32(RLC_CNTL);
5214 
5215 	if (data & RLC_ENABLE) {
5216 		data &= ~RLC_ENABLE;
5217 		WREG32(RLC_CNTL, data);
5218 
5219 		si_wait_for_rlc_serdes(rdev);
5220 	}
5221 
5222 	return orig;
5223 }
5224 
si_update_rlc(struct radeon_device * rdev,u32 rlc)5225 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5226 {
5227 	u32 tmp;
5228 
5229 	tmp = RREG32(RLC_CNTL);
5230 	if (tmp != rlc)
5231 		WREG32(RLC_CNTL, rlc);
5232 }
5233 
si_enable_dma_pg(struct radeon_device * rdev,bool enable)5234 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5235 {
5236 	u32 data, orig;
5237 
5238 	orig = data = RREG32(DMA_PG);
5239 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5240 		data |= PG_CNTL_ENABLE;
5241 	else
5242 		data &= ~PG_CNTL_ENABLE;
5243 	if (orig != data)
5244 		WREG32(DMA_PG, data);
5245 }
5246 
si_init_dma_pg(struct radeon_device * rdev)5247 static void si_init_dma_pg(struct radeon_device *rdev)
5248 {
5249 	u32 tmp;
5250 
5251 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5252 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5253 
5254 	for (tmp = 0; tmp < 5; tmp++)
5255 		WREG32(DMA_PGFSM_WRITE, 0);
5256 }
5257 
si_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)5258 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5259 			       bool enable)
5260 {
5261 	u32 tmp;
5262 
5263 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5264 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5265 		WREG32(RLC_TTOP_D, tmp);
5266 
5267 		tmp = RREG32(RLC_PG_CNTL);
5268 		tmp |= GFX_PG_ENABLE;
5269 		WREG32(RLC_PG_CNTL, tmp);
5270 
5271 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5272 		tmp |= AUTO_PG_EN;
5273 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5274 	} else {
5275 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5276 		tmp &= ~AUTO_PG_EN;
5277 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5278 
5279 		tmp = RREG32(DB_RENDER_CONTROL);
5280 	}
5281 }
5282 
si_init_gfx_cgpg(struct radeon_device * rdev)5283 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5284 {
5285 	u32 tmp;
5286 
5287 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5288 
5289 	tmp = RREG32(RLC_PG_CNTL);
5290 	tmp |= GFX_PG_SRC;
5291 	WREG32(RLC_PG_CNTL, tmp);
5292 
5293 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5294 
5295 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5296 
5297 	tmp &= ~GRBM_REG_SGIT_MASK;
5298 	tmp |= GRBM_REG_SGIT(0x700);
5299 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5300 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5301 }
5302 
si_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)5303 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5304 {
5305 	u32 mask = 0, tmp, tmp1;
5306 	int i;
5307 
5308 	si_select_se_sh(rdev, se, sh);
5309 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5310 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5311 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5312 
5313 	tmp &= 0xffff0000;
5314 
5315 	tmp |= tmp1;
5316 	tmp >>= 16;
5317 
5318 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5319 		mask <<= 1;
5320 		mask |= 1;
5321 	}
5322 
5323 	return (~tmp) & mask;
5324 }
5325 
si_init_ao_cu_mask(struct radeon_device * rdev)5326 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5327 {
5328 	u32 i, j, k, active_cu_number = 0;
5329 	u32 mask, counter, cu_bitmap;
5330 	u32 tmp = 0;
5331 
5332 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5333 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5334 			mask = 1;
5335 			cu_bitmap = 0;
5336 			counter  = 0;
5337 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5338 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5339 					if (counter < 2)
5340 						cu_bitmap |= mask;
5341 					counter++;
5342 				}
5343 				mask <<= 1;
5344 			}
5345 
5346 			active_cu_number += counter;
5347 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5348 		}
5349 	}
5350 
5351 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5352 
5353 	tmp = RREG32(RLC_MAX_PG_CU);
5354 	tmp &= ~MAX_PU_CU_MASK;
5355 	tmp |= MAX_PU_CU(active_cu_number);
5356 	WREG32(RLC_MAX_PG_CU, tmp);
5357 }
5358 
si_enable_cgcg(struct radeon_device * rdev,bool enable)5359 static void si_enable_cgcg(struct radeon_device *rdev,
5360 			   bool enable)
5361 {
5362 	u32 data, orig, tmp;
5363 
5364 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5365 
5366 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5367 		si_enable_gui_idle_interrupt(rdev, true);
5368 
5369 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5370 
5371 		tmp = si_halt_rlc(rdev);
5372 
5373 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5374 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5375 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5376 
5377 		si_wait_for_rlc_serdes(rdev);
5378 
5379 		si_update_rlc(rdev, tmp);
5380 
5381 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5382 
5383 		data |= CGCG_EN | CGLS_EN;
5384 	} else {
5385 		si_enable_gui_idle_interrupt(rdev, false);
5386 
5387 		RREG32(CB_CGTT_SCLK_CTRL);
5388 		RREG32(CB_CGTT_SCLK_CTRL);
5389 		RREG32(CB_CGTT_SCLK_CTRL);
5390 		RREG32(CB_CGTT_SCLK_CTRL);
5391 
5392 		data &= ~(CGCG_EN | CGLS_EN);
5393 	}
5394 
5395 	if (orig != data)
5396 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5397 }
5398 
si_enable_mgcg(struct radeon_device * rdev,bool enable)5399 static void si_enable_mgcg(struct radeon_device *rdev,
5400 			   bool enable)
5401 {
5402 	u32 data, orig, tmp = 0;
5403 
5404 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5405 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5406 		data = 0x96940200;
5407 		if (orig != data)
5408 			WREG32(CGTS_SM_CTRL_REG, data);
5409 
5410 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5411 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5412 			data |= CP_MEM_LS_EN;
5413 			if (orig != data)
5414 				WREG32(CP_MEM_SLP_CNTL, data);
5415 		}
5416 
5417 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5418 		data &= 0xffffffc0;
5419 		if (orig != data)
5420 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5421 
5422 		tmp = si_halt_rlc(rdev);
5423 
5424 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5425 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5426 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5427 
5428 		si_update_rlc(rdev, tmp);
5429 	} else {
5430 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5431 		data |= 0x00000003;
5432 		if (orig != data)
5433 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5434 
5435 		data = RREG32(CP_MEM_SLP_CNTL);
5436 		if (data & CP_MEM_LS_EN) {
5437 			data &= ~CP_MEM_LS_EN;
5438 			WREG32(CP_MEM_SLP_CNTL, data);
5439 		}
5440 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5441 		data |= LS_OVERRIDE | OVERRIDE;
5442 		if (orig != data)
5443 			WREG32(CGTS_SM_CTRL_REG, data);
5444 
5445 		tmp = si_halt_rlc(rdev);
5446 
5447 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5448 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5449 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5450 
5451 		si_update_rlc(rdev, tmp);
5452 	}
5453 }
5454 
si_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)5455 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5456 			       bool enable)
5457 {
5458 	u32 orig, data, tmp;
5459 
5460 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5461 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5462 		tmp |= 0x3fff;
5463 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5464 
5465 		orig = data = RREG32(UVD_CGC_CTRL);
5466 		data |= DCM;
5467 		if (orig != data)
5468 			WREG32(UVD_CGC_CTRL, data);
5469 
5470 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5471 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5472 	} else {
5473 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5474 		tmp &= ~0x3fff;
5475 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5476 
5477 		orig = data = RREG32(UVD_CGC_CTRL);
5478 		data &= ~DCM;
5479 		if (orig != data)
5480 			WREG32(UVD_CGC_CTRL, data);
5481 
5482 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5483 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5484 	}
5485 }
5486 
5487 static const u32 mc_cg_registers[] =
5488 {
5489 	MC_HUB_MISC_HUB_CG,
5490 	MC_HUB_MISC_SIP_CG,
5491 	MC_HUB_MISC_VM_CG,
5492 	MC_XPB_CLK_GAT,
5493 	ATC_MISC_CG,
5494 	MC_CITF_MISC_WR_CG,
5495 	MC_CITF_MISC_RD_CG,
5496 	MC_CITF_MISC_VM_CG,
5497 	VM_L2_CG,
5498 };
5499 
si_enable_mc_ls(struct radeon_device * rdev,bool enable)5500 static void si_enable_mc_ls(struct radeon_device *rdev,
5501 			    bool enable)
5502 {
5503 	int i;
5504 	u32 orig, data;
5505 
5506 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5507 		orig = data = RREG32(mc_cg_registers[i]);
5508 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5509 			data |= MC_LS_ENABLE;
5510 		else
5511 			data &= ~MC_LS_ENABLE;
5512 		if (data != orig)
5513 			WREG32(mc_cg_registers[i], data);
5514 	}
5515 }
5516 
si_enable_mc_mgcg(struct radeon_device * rdev,bool enable)5517 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5518 			       bool enable)
5519 {
5520 	int i;
5521 	u32 orig, data;
5522 
5523 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5524 		orig = data = RREG32(mc_cg_registers[i]);
5525 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5526 			data |= MC_CG_ENABLE;
5527 		else
5528 			data &= ~MC_CG_ENABLE;
5529 		if (data != orig)
5530 			WREG32(mc_cg_registers[i], data);
5531 	}
5532 }
5533 
si_enable_dma_mgcg(struct radeon_device * rdev,bool enable)5534 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5535 			       bool enable)
5536 {
5537 	u32 orig, data, offset;
5538 	int i;
5539 
5540 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5541 		for (i = 0; i < 2; i++) {
5542 			if (i == 0)
5543 				offset = DMA0_REGISTER_OFFSET;
5544 			else
5545 				offset = DMA1_REGISTER_OFFSET;
5546 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5547 			data &= ~MEM_POWER_OVERRIDE;
5548 			if (data != orig)
5549 				WREG32(DMA_POWER_CNTL + offset, data);
5550 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5551 		}
5552 	} else {
5553 		for (i = 0; i < 2; i++) {
5554 			if (i == 0)
5555 				offset = DMA0_REGISTER_OFFSET;
5556 			else
5557 				offset = DMA1_REGISTER_OFFSET;
5558 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5559 			data |= MEM_POWER_OVERRIDE;
5560 			if (data != orig)
5561 				WREG32(DMA_POWER_CNTL + offset, data);
5562 
5563 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5564 			data = 0xff000000;
5565 			if (data != orig)
5566 				WREG32(DMA_CLK_CTRL + offset, data);
5567 		}
5568 	}
5569 }
5570 
si_enable_bif_mgls(struct radeon_device * rdev,bool enable)5571 static void si_enable_bif_mgls(struct radeon_device *rdev,
5572 			       bool enable)
5573 {
5574 	u32 orig, data;
5575 
5576 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5577 
5578 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5579 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5580 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5581 	else
5582 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5583 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5584 
5585 	if (orig != data)
5586 		WREG32_PCIE(PCIE_CNTL2, data);
5587 }
5588 
si_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)5589 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5590 			       bool enable)
5591 {
5592 	u32 orig, data;
5593 
5594 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5595 
5596 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5597 		data &= ~CLOCK_GATING_DIS;
5598 	else
5599 		data |= CLOCK_GATING_DIS;
5600 
5601 	if (orig != data)
5602 		WREG32(HDP_HOST_PATH_CNTL, data);
5603 }
5604 
si_enable_hdp_ls(struct radeon_device * rdev,bool enable)5605 static void si_enable_hdp_ls(struct radeon_device *rdev,
5606 			     bool enable)
5607 {
5608 	u32 orig, data;
5609 
5610 	orig = data = RREG32(HDP_MEM_POWER_LS);
5611 
5612 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5613 		data |= HDP_LS_ENABLE;
5614 	else
5615 		data &= ~HDP_LS_ENABLE;
5616 
5617 	if (orig != data)
5618 		WREG32(HDP_MEM_POWER_LS, data);
5619 }
5620 
si_update_cg(struct radeon_device * rdev,u32 block,bool enable)5621 static void si_update_cg(struct radeon_device *rdev,
5622 			 u32 block, bool enable)
5623 {
5624 	if (block & RADEON_CG_BLOCK_GFX) {
5625 		si_enable_gui_idle_interrupt(rdev, false);
5626 		/* order matters! */
5627 		if (enable) {
5628 			si_enable_mgcg(rdev, true);
5629 			si_enable_cgcg(rdev, true);
5630 		} else {
5631 			si_enable_cgcg(rdev, false);
5632 			si_enable_mgcg(rdev, false);
5633 		}
5634 		si_enable_gui_idle_interrupt(rdev, true);
5635 	}
5636 
5637 	if (block & RADEON_CG_BLOCK_MC) {
5638 		si_enable_mc_mgcg(rdev, enable);
5639 		si_enable_mc_ls(rdev, enable);
5640 	}
5641 
5642 	if (block & RADEON_CG_BLOCK_SDMA) {
5643 		si_enable_dma_mgcg(rdev, enable);
5644 	}
5645 
5646 	if (block & RADEON_CG_BLOCK_BIF) {
5647 		si_enable_bif_mgls(rdev, enable);
5648 	}
5649 
5650 	if (block & RADEON_CG_BLOCK_UVD) {
5651 		if (rdev->has_uvd) {
5652 			si_enable_uvd_mgcg(rdev, enable);
5653 		}
5654 	}
5655 
5656 	if (block & RADEON_CG_BLOCK_HDP) {
5657 		si_enable_hdp_mgcg(rdev, enable);
5658 		si_enable_hdp_ls(rdev, enable);
5659 	}
5660 }
5661 
si_init_cg(struct radeon_device * rdev)5662 static void si_init_cg(struct radeon_device *rdev)
5663 {
5664 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5665 			    RADEON_CG_BLOCK_MC |
5666 			    RADEON_CG_BLOCK_SDMA |
5667 			    RADEON_CG_BLOCK_BIF |
5668 			    RADEON_CG_BLOCK_HDP), true);
5669 	if (rdev->has_uvd) {
5670 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5671 		si_init_uvd_internal_cg(rdev);
5672 	}
5673 }
5674 
si_fini_cg(struct radeon_device * rdev)5675 static void si_fini_cg(struct radeon_device *rdev)
5676 {
5677 	if (rdev->has_uvd) {
5678 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5679 	}
5680 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5681 			    RADEON_CG_BLOCK_MC |
5682 			    RADEON_CG_BLOCK_SDMA |
5683 			    RADEON_CG_BLOCK_BIF |
5684 			    RADEON_CG_BLOCK_HDP), false);
5685 }
5686 
si_get_csb_size(struct radeon_device * rdev)5687 u32 si_get_csb_size(struct radeon_device *rdev)
5688 {
5689 	u32 count = 0;
5690 	const struct cs_section_def *sect = NULL;
5691 	const struct cs_extent_def *ext = NULL;
5692 
5693 	if (rdev->rlc.cs_data == NULL)
5694 		return 0;
5695 
5696 	/* begin clear state */
5697 	count += 2;
5698 	/* context control state */
5699 	count += 3;
5700 
5701 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5702 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5703 			if (sect->id == SECT_CONTEXT)
5704 				count += 2 + ext->reg_count;
5705 			else
5706 				return 0;
5707 		}
5708 	}
5709 	/* pa_sc_raster_config */
5710 	count += 3;
5711 	/* end clear state */
5712 	count += 2;
5713 	/* clear state */
5714 	count += 2;
5715 
5716 	return count;
5717 }
5718 
si_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)5719 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5720 {
5721 	u32 count = 0, i;
5722 	const struct cs_section_def *sect = NULL;
5723 	const struct cs_extent_def *ext = NULL;
5724 
5725 	if (rdev->rlc.cs_data == NULL)
5726 		return;
5727 	if (buffer == NULL)
5728 		return;
5729 
5730 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5731 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5732 
5733 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5734 	buffer[count++] = cpu_to_le32(0x80000000);
5735 	buffer[count++] = cpu_to_le32(0x80000000);
5736 
5737 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5738 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5739 			if (sect->id == SECT_CONTEXT) {
5740 				buffer[count++] =
5741 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5742 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5743 				for (i = 0; i < ext->reg_count; i++)
5744 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5745 			} else {
5746 				return;
5747 			}
5748 		}
5749 	}
5750 
5751 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5752 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5753 	switch (rdev->family) {
5754 	case CHIP_TAHITI:
5755 	case CHIP_PITCAIRN:
5756 		buffer[count++] = cpu_to_le32(0x2a00126a);
5757 		break;
5758 	case CHIP_VERDE:
5759 		buffer[count++] = cpu_to_le32(0x0000124a);
5760 		break;
5761 	case CHIP_OLAND:
5762 		buffer[count++] = cpu_to_le32(0x00000082);
5763 		break;
5764 	case CHIP_HAINAN:
5765 		buffer[count++] = cpu_to_le32(0x00000000);
5766 		break;
5767 	default:
5768 		buffer[count++] = cpu_to_le32(0x00000000);
5769 		break;
5770 	}
5771 
5772 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5773 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5774 
5775 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5776 	buffer[count++] = cpu_to_le32(0);
5777 }
5778 
si_init_pg(struct radeon_device * rdev)5779 static void si_init_pg(struct radeon_device *rdev)
5780 {
5781 	if (rdev->pg_flags) {
5782 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5783 			si_init_dma_pg(rdev);
5784 		}
5785 		si_init_ao_cu_mask(rdev);
5786 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5787 			si_init_gfx_cgpg(rdev);
5788 		} else {
5789 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5790 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5791 		}
5792 		si_enable_dma_pg(rdev, true);
5793 		si_enable_gfx_cgpg(rdev, true);
5794 	} else {
5795 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5796 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5797 	}
5798 }
5799 
si_fini_pg(struct radeon_device * rdev)5800 static void si_fini_pg(struct radeon_device *rdev)
5801 {
5802 	if (rdev->pg_flags) {
5803 		si_enable_dma_pg(rdev, false);
5804 		si_enable_gfx_cgpg(rdev, false);
5805 	}
5806 }
5807 
5808 /*
5809  * RLC
5810  */
si_rlc_reset(struct radeon_device * rdev)5811 void si_rlc_reset(struct radeon_device *rdev)
5812 {
5813 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5814 
5815 	tmp |= SOFT_RESET_RLC;
5816 	WREG32(GRBM_SOFT_RESET, tmp);
5817 	udelay(50);
5818 	tmp &= ~SOFT_RESET_RLC;
5819 	WREG32(GRBM_SOFT_RESET, tmp);
5820 	udelay(50);
5821 }
5822 
si_rlc_stop(struct radeon_device * rdev)5823 static void si_rlc_stop(struct radeon_device *rdev)
5824 {
5825 	WREG32(RLC_CNTL, 0);
5826 
5827 	si_enable_gui_idle_interrupt(rdev, false);
5828 
5829 	si_wait_for_rlc_serdes(rdev);
5830 }
5831 
si_rlc_start(struct radeon_device * rdev)5832 static void si_rlc_start(struct radeon_device *rdev)
5833 {
5834 	WREG32(RLC_CNTL, RLC_ENABLE);
5835 
5836 	si_enable_gui_idle_interrupt(rdev, true);
5837 
5838 	udelay(50);
5839 }
5840 
si_lbpw_supported(struct radeon_device * rdev)5841 static bool si_lbpw_supported(struct radeon_device *rdev)
5842 {
5843 	u32 tmp;
5844 
5845 	/* Enable LBPW only for DDR3 */
5846 	tmp = RREG32(MC_SEQ_MISC0);
5847 	if ((tmp & 0xF0000000) == 0xB0000000)
5848 		return true;
5849 	return false;
5850 }
5851 
si_enable_lbpw(struct radeon_device * rdev,bool enable)5852 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5853 {
5854 	u32 tmp;
5855 
5856 	tmp = RREG32(RLC_LB_CNTL);
5857 	if (enable)
5858 		tmp |= LOAD_BALANCE_ENABLE;
5859 	else
5860 		tmp &= ~LOAD_BALANCE_ENABLE;
5861 	WREG32(RLC_LB_CNTL, tmp);
5862 
5863 	if (!enable) {
5864 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5865 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5866 	}
5867 }
5868 
si_rlc_resume(struct radeon_device * rdev)5869 static int si_rlc_resume(struct radeon_device *rdev)
5870 {
5871 	u32 i;
5872 
5873 	if (!rdev->rlc_fw)
5874 		return -EINVAL;
5875 
5876 	si_rlc_stop(rdev);
5877 
5878 	si_rlc_reset(rdev);
5879 
5880 	si_init_pg(rdev);
5881 
5882 	si_init_cg(rdev);
5883 
5884 	WREG32(RLC_RL_BASE, 0);
5885 	WREG32(RLC_RL_SIZE, 0);
5886 	WREG32(RLC_LB_CNTL, 0);
5887 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5888 	WREG32(RLC_LB_CNTR_INIT, 0);
5889 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5890 
5891 	WREG32(RLC_MC_CNTL, 0);
5892 	WREG32(RLC_UCODE_CNTL, 0);
5893 
5894 	if (rdev->new_fw) {
5895 		const struct rlc_firmware_header_v1_0 *hdr =
5896 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5897 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5898 		const __le32 *fw_data = (const __le32 *)
5899 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5900 
5901 		radeon_ucode_print_rlc_hdr(&hdr->header);
5902 
5903 		for (i = 0; i < fw_size; i++) {
5904 			WREG32(RLC_UCODE_ADDR, i);
5905 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5906 		}
5907 	} else {
5908 		const __be32 *fw_data =
5909 			(const __be32 *)rdev->rlc_fw->data;
5910 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5911 			WREG32(RLC_UCODE_ADDR, i);
5912 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5913 		}
5914 	}
5915 	WREG32(RLC_UCODE_ADDR, 0);
5916 
5917 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5918 
5919 	si_rlc_start(rdev);
5920 
5921 	return 0;
5922 }
5923 
si_enable_interrupts(struct radeon_device * rdev)5924 static void si_enable_interrupts(struct radeon_device *rdev)
5925 {
5926 	u32 ih_cntl = RREG32(IH_CNTL);
5927 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5928 
5929 	ih_cntl |= ENABLE_INTR;
5930 	ih_rb_cntl |= IH_RB_ENABLE;
5931 	WREG32(IH_CNTL, ih_cntl);
5932 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5933 	rdev->ih.enabled = true;
5934 }
5935 
si_disable_interrupts(struct radeon_device * rdev)5936 static void si_disable_interrupts(struct radeon_device *rdev)
5937 {
5938 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5939 	u32 ih_cntl = RREG32(IH_CNTL);
5940 
5941 	ih_rb_cntl &= ~IH_RB_ENABLE;
5942 	ih_cntl &= ~ENABLE_INTR;
5943 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5944 	WREG32(IH_CNTL, ih_cntl);
5945 	/* set rptr, wptr to 0 */
5946 	WREG32(IH_RB_RPTR, 0);
5947 	WREG32(IH_RB_WPTR, 0);
5948 	rdev->ih.enabled = false;
5949 	rdev->ih.rptr = 0;
5950 }
5951 
si_disable_interrupt_state(struct radeon_device * rdev)5952 static void si_disable_interrupt_state(struct radeon_device *rdev)
5953 {
5954 	int i;
5955 	u32 tmp;
5956 
5957 	tmp = RREG32(CP_INT_CNTL_RING0) &
5958 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5959 	WREG32(CP_INT_CNTL_RING0, tmp);
5960 	WREG32(CP_INT_CNTL_RING1, 0);
5961 	WREG32(CP_INT_CNTL_RING2, 0);
5962 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5963 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5964 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5965 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5966 	WREG32(GRBM_INT_CNTL, 0);
5967 	WREG32(SRBM_INT_CNTL, 0);
5968 	for (i = 0; i < rdev->num_crtc; i++)
5969 		WREG32(INT_MASK + crtc_offsets[i], 0);
5970 	for (i = 0; i < rdev->num_crtc; i++)
5971 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5972 
5973 	if (!ASIC_IS_NODCE(rdev)) {
5974 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5975 
5976 		for (i = 0; i < 6; i++)
5977 			WREG32_AND(DC_HPDx_INT_CONTROL(i),
5978 				   DC_HPDx_INT_POLARITY);
5979 	}
5980 }
5981 
si_irq_init(struct radeon_device * rdev)5982 static int si_irq_init(struct radeon_device *rdev)
5983 {
5984 	int ret = 0;
5985 	int rb_bufsz;
5986 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5987 
5988 	/* allocate ring */
5989 	ret = r600_ih_ring_alloc(rdev);
5990 	if (ret)
5991 		return ret;
5992 
5993 	/* disable irqs */
5994 	si_disable_interrupts(rdev);
5995 
5996 	/* init rlc */
5997 	ret = si_rlc_resume(rdev);
5998 	if (ret) {
5999 		r600_ih_ring_fini(rdev);
6000 		return ret;
6001 	}
6002 
6003 	/* setup interrupt control */
6004 	/* set dummy read address to dummy page address */
6005 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6006 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6007 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6008 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6009 	 */
6010 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6011 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6012 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6013 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6014 
6015 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6016 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6017 
6018 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6019 		      IH_WPTR_OVERFLOW_CLEAR |
6020 		      (rb_bufsz << 1));
6021 
6022 	if (rdev->wb.enabled)
6023 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6024 
6025 	/* set the writeback address whether it's enabled or not */
6026 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6027 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6028 
6029 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6030 
6031 	/* set rptr, wptr to 0 */
6032 	WREG32(IH_RB_RPTR, 0);
6033 	WREG32(IH_RB_WPTR, 0);
6034 
6035 	/* Default settings for IH_CNTL (disabled at first) */
6036 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6037 	/* RPTR_REARM only works if msi's are enabled */
6038 	if (rdev->msi_enabled)
6039 		ih_cntl |= RPTR_REARM;
6040 	WREG32(IH_CNTL, ih_cntl);
6041 
6042 	/* force the active interrupt state to all disabled */
6043 	si_disable_interrupt_state(rdev);
6044 
6045 	pci_set_master(rdev->pdev);
6046 
6047 	/* enable irqs */
6048 	si_enable_interrupts(rdev);
6049 
6050 	return ret;
6051 }
6052 
6053 /* The order we write back each register here is important */
si_irq_set(struct radeon_device * rdev)6054 int si_irq_set(struct radeon_device *rdev)
6055 {
6056 	int i;
6057 	u32 cp_int_cntl;
6058 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6059 	u32 grbm_int_cntl = 0;
6060 	u32 dma_cntl, dma_cntl1;
6061 	u32 thermal_int = 0;
6062 
6063 	if (!rdev->irq.installed) {
6064 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6065 		return -EINVAL;
6066 	}
6067 	/* don't enable anything if the ih is disabled */
6068 	if (!rdev->ih.enabled) {
6069 		si_disable_interrupts(rdev);
6070 		/* force the active interrupt state to all disabled */
6071 		si_disable_interrupt_state(rdev);
6072 		return 0;
6073 	}
6074 
6075 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6076 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6077 
6078 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6079 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6080 
6081 	thermal_int = RREG32(CG_THERMAL_INT) &
6082 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6083 
6084 	/* enable CP interrupts on all rings */
6085 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6086 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6087 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6088 	}
6089 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6090 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6091 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6092 	}
6093 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6094 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6095 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6096 	}
6097 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6098 		DRM_DEBUG("si_irq_set: sw int dma\n");
6099 		dma_cntl |= TRAP_ENABLE;
6100 	}
6101 
6102 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6103 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6104 		dma_cntl1 |= TRAP_ENABLE;
6105 	}
6106 
6107 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6108 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6109 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6110 
6111 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6112 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6113 
6114 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6115 
6116 	if (rdev->irq.dpm_thermal) {
6117 		DRM_DEBUG("dpm thermal\n");
6118 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6119 	}
6120 
6121 	for (i = 0; i < rdev->num_crtc; i++) {
6122 		radeon_irq_kms_set_irq_n_enabled(
6123 		    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6124 		    rdev->irq.crtc_vblank_int[i] ||
6125 		    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6126 	}
6127 
6128 	for (i = 0; i < rdev->num_crtc; i++)
6129 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6130 
6131 	if (!ASIC_IS_NODCE(rdev)) {
6132 		for (i = 0; i < 6; i++) {
6133 			radeon_irq_kms_set_irq_n_enabled(
6134 			    rdev, DC_HPDx_INT_CONTROL(i),
6135 			    DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6136 			    rdev->irq.hpd[i], "HPD", i);
6137 		}
6138 	}
6139 
6140 	WREG32(CG_THERMAL_INT, thermal_int);
6141 
6142 	/* posting read */
6143 	RREG32(SRBM_STATUS);
6144 
6145 	return 0;
6146 }
6147 
6148 /* The order we write back each register here is important */
si_irq_ack(struct radeon_device * rdev)6149 static inline void si_irq_ack(struct radeon_device *rdev)
6150 {
6151 	int i, j;
6152 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6153 	u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6154 
6155 	if (ASIC_IS_NODCE(rdev))
6156 		return;
6157 
6158 	for (i = 0; i < 6; i++) {
6159 		disp_int[i] = RREG32(si_disp_int_status[i]);
6160 		if (i < rdev->num_crtc)
6161 			grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6162 	}
6163 
6164 	/* We write back each interrupt register in pairs of two */
6165 	for (i = 0; i < rdev->num_crtc; i += 2) {
6166 		for (j = i; j < (i + 2); j++) {
6167 			if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6168 				WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6169 				       GRPH_PFLIP_INT_CLEAR);
6170 		}
6171 
6172 		for (j = i; j < (i + 2); j++) {
6173 			if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6174 				WREG32(VBLANK_STATUS + crtc_offsets[j],
6175 				       VBLANK_ACK);
6176 			if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6177 				WREG32(VLINE_STATUS + crtc_offsets[j],
6178 				       VLINE_ACK);
6179 		}
6180 	}
6181 
6182 	for (i = 0; i < 6; i++) {
6183 		if (disp_int[i] & DC_HPD1_INTERRUPT)
6184 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6185 	}
6186 
6187 	for (i = 0; i < 6; i++) {
6188 		if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6189 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6190 	}
6191 }
6192 
si_irq_disable(struct radeon_device * rdev)6193 static void si_irq_disable(struct radeon_device *rdev)
6194 {
6195 	si_disable_interrupts(rdev);
6196 	/* Wait and acknowledge irq */
6197 	mdelay(1);
6198 	si_irq_ack(rdev);
6199 	si_disable_interrupt_state(rdev);
6200 }
6201 
si_irq_suspend(struct radeon_device * rdev)6202 static void si_irq_suspend(struct radeon_device *rdev)
6203 {
6204 	si_irq_disable(rdev);
6205 	si_rlc_stop(rdev);
6206 }
6207 
si_irq_fini(struct radeon_device * rdev)6208 static void si_irq_fini(struct radeon_device *rdev)
6209 {
6210 	si_irq_suspend(rdev);
6211 	r600_ih_ring_fini(rdev);
6212 }
6213 
si_get_ih_wptr(struct radeon_device * rdev)6214 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6215 {
6216 	u32 wptr, tmp;
6217 
6218 	if (rdev->wb.enabled)
6219 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6220 	else
6221 		wptr = RREG32(IH_RB_WPTR);
6222 
6223 	if (wptr & RB_OVERFLOW) {
6224 		wptr &= ~RB_OVERFLOW;
6225 		/* When a ring buffer overflow happen start parsing interrupt
6226 		 * from the last not overwritten vector (wptr + 16). Hopefully
6227 		 * this should allow us to catchup.
6228 		 */
6229 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6230 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6231 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6232 		tmp = RREG32(IH_RB_CNTL);
6233 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6234 		WREG32(IH_RB_CNTL, tmp);
6235 	}
6236 	return (wptr & rdev->ih.ptr_mask);
6237 }
6238 
6239 /*        SI IV Ring
6240  * Each IV ring entry is 128 bits:
6241  * [7:0]    - interrupt source id
6242  * [31:8]   - reserved
6243  * [59:32]  - interrupt source data
6244  * [63:60]  - reserved
6245  * [71:64]  - RINGID
6246  * [79:72]  - VMID
6247  * [127:80] - reserved
6248  */
si_irq_process(struct radeon_device * rdev)6249 int si_irq_process(struct radeon_device *rdev)
6250 {
6251 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6252 	u32 crtc_idx, hpd_idx;
6253 	u32 mask;
6254 	u32 wptr;
6255 	u32 rptr;
6256 	u32 src_id, src_data, ring_id;
6257 	u32 ring_index;
6258 	bool queue_hotplug = false;
6259 	bool queue_dp = false;
6260 	bool queue_thermal = false;
6261 	u32 status, addr;
6262 	const char *event_name;
6263 
6264 	if (!rdev->ih.enabled || rdev->shutdown)
6265 		return IRQ_NONE;
6266 
6267 	wptr = si_get_ih_wptr(rdev);
6268 
6269 restart_ih:
6270 	/* is somebody else already processing irqs? */
6271 	if (atomic_xchg(&rdev->ih.lock, 1))
6272 		return IRQ_NONE;
6273 
6274 	rptr = rdev->ih.rptr;
6275 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6276 
6277 	/* Order reading of wptr vs. reading of IH ring data */
6278 	rmb();
6279 
6280 	/* display interrupts */
6281 	si_irq_ack(rdev);
6282 
6283 	while (rptr != wptr) {
6284 		/* wptr/rptr are in bytes! */
6285 		ring_index = rptr / 4;
6286 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6287 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6288 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6289 
6290 		switch (src_id) {
6291 		case 1: /* D1 vblank/vline */
6292 		case 2: /* D2 vblank/vline */
6293 		case 3: /* D3 vblank/vline */
6294 		case 4: /* D4 vblank/vline */
6295 		case 5: /* D5 vblank/vline */
6296 		case 6: /* D6 vblank/vline */
6297 			crtc_idx = src_id - 1;
6298 
6299 			if (src_data == 0) { /* vblank */
6300 				mask = LB_D1_VBLANK_INTERRUPT;
6301 				event_name = "vblank";
6302 
6303 				if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6304 					drm_handle_vblank(rdev->ddev, crtc_idx);
6305 					rdev->pm.vblank_sync = true;
6306 					wake_up(&rdev->irq.vblank_queue);
6307 				}
6308 				if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6309 					radeon_crtc_handle_vblank(rdev,
6310 								  crtc_idx);
6311 				}
6312 
6313 			} else if (src_data == 1) { /* vline */
6314 				mask = LB_D1_VLINE_INTERRUPT;
6315 				event_name = "vline";
6316 			} else {
6317 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6318 					  src_id, src_data);
6319 				break;
6320 			}
6321 
6322 			if (!(disp_int[crtc_idx] & mask)) {
6323 				DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6324 					  crtc_idx + 1, event_name);
6325 			}
6326 
6327 			disp_int[crtc_idx] &= ~mask;
6328 			DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6329 
6330 			break;
6331 		case 8: /* D1 page flip */
6332 		case 10: /* D2 page flip */
6333 		case 12: /* D3 page flip */
6334 		case 14: /* D4 page flip */
6335 		case 16: /* D5 page flip */
6336 		case 18: /* D6 page flip */
6337 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6338 			if (radeon_use_pflipirq > 0)
6339 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6340 			break;
6341 		case 42: /* HPD hotplug */
6342 			if (src_data <= 5) {
6343 				hpd_idx = src_data;
6344 				mask = DC_HPD1_INTERRUPT;
6345 				queue_hotplug = true;
6346 				event_name = "HPD";
6347 
6348 			} else if (src_data <= 11) {
6349 				hpd_idx = src_data - 6;
6350 				mask = DC_HPD1_RX_INTERRUPT;
6351 				queue_dp = true;
6352 				event_name = "HPD_RX";
6353 
6354 			} else {
6355 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6356 					  src_id, src_data);
6357 				break;
6358 			}
6359 
6360 			if (!(disp_int[hpd_idx] & mask))
6361 				DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6362 
6363 			disp_int[hpd_idx] &= ~mask;
6364 			DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6365 			break;
6366 		case 96:
6367 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6368 			WREG32(SRBM_INT_ACK, 0x1);
6369 			break;
6370 		case 124: /* UVD */
6371 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6372 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6373 			break;
6374 		case 146:
6375 		case 147:
6376 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6377 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6378 			/* reset addr and status */
6379 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6380 			if (addr == 0x0 && status == 0x0)
6381 				break;
6382 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6383 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6384 				addr);
6385 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6386 				status);
6387 			si_vm_decode_fault(rdev, status, addr);
6388 			break;
6389 		case 176: /* RINGID0 CP_INT */
6390 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6391 			break;
6392 		case 177: /* RINGID1 CP_INT */
6393 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6394 			break;
6395 		case 178: /* RINGID2 CP_INT */
6396 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6397 			break;
6398 		case 181: /* CP EOP event */
6399 			DRM_DEBUG("IH: CP EOP\n");
6400 			switch (ring_id) {
6401 			case 0:
6402 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6403 				break;
6404 			case 1:
6405 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6406 				break;
6407 			case 2:
6408 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6409 				break;
6410 			}
6411 			break;
6412 		case 224: /* DMA trap event */
6413 			DRM_DEBUG("IH: DMA trap\n");
6414 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6415 			break;
6416 		case 230: /* thermal low to high */
6417 			DRM_DEBUG("IH: thermal low to high\n");
6418 			rdev->pm.dpm.thermal.high_to_low = false;
6419 			queue_thermal = true;
6420 			break;
6421 		case 231: /* thermal high to low */
6422 			DRM_DEBUG("IH: thermal high to low\n");
6423 			rdev->pm.dpm.thermal.high_to_low = true;
6424 			queue_thermal = true;
6425 			break;
6426 		case 233: /* GUI IDLE */
6427 			DRM_DEBUG("IH: GUI idle\n");
6428 			break;
6429 		case 244: /* DMA trap event */
6430 			DRM_DEBUG("IH: DMA1 trap\n");
6431 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6432 			break;
6433 		default:
6434 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6435 			break;
6436 		}
6437 
6438 		/* wptr/rptr are in bytes! */
6439 		rptr += 16;
6440 		rptr &= rdev->ih.ptr_mask;
6441 		WREG32(IH_RB_RPTR, rptr);
6442 	}
6443 	if (queue_dp)
6444 		schedule_work(&rdev->dp_work);
6445 	if (queue_hotplug)
6446 		schedule_delayed_work(&rdev->hotplug_work, 0);
6447 	if (queue_thermal && rdev->pm.dpm_enabled)
6448 		schedule_work(&rdev->pm.dpm.thermal.work);
6449 	rdev->ih.rptr = rptr;
6450 	atomic_set(&rdev->ih.lock, 0);
6451 
6452 	/* make sure wptr hasn't changed while processing */
6453 	wptr = si_get_ih_wptr(rdev);
6454 	if (wptr != rptr)
6455 		goto restart_ih;
6456 
6457 	return IRQ_HANDLED;
6458 }
6459 
6460 /*
6461  * startup/shutdown callbacks
6462  */
si_uvd_init(struct radeon_device * rdev)6463 static void si_uvd_init(struct radeon_device *rdev)
6464 {
6465 	int r;
6466 
6467 	if (!rdev->has_uvd)
6468 		return;
6469 
6470 	r = radeon_uvd_init(rdev);
6471 	if (r) {
6472 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6473 		/*
6474 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6475 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6476 		 * there. So it is pointless to try to go through that code
6477 		 * hence why we disable uvd here.
6478 		 */
6479 		rdev->has_uvd = false;
6480 		return;
6481 	}
6482 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6483 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6484 }
6485 
si_uvd_start(struct radeon_device * rdev)6486 static void si_uvd_start(struct radeon_device *rdev)
6487 {
6488 	int r;
6489 
6490 	if (!rdev->has_uvd)
6491 		return;
6492 
6493 	r = uvd_v2_2_resume(rdev);
6494 	if (r) {
6495 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6496 		goto error;
6497 	}
6498 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6499 	if (r) {
6500 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6501 		goto error;
6502 	}
6503 	return;
6504 
6505 error:
6506 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6507 }
6508 
si_uvd_resume(struct radeon_device * rdev)6509 static void si_uvd_resume(struct radeon_device *rdev)
6510 {
6511 	struct radeon_ring *ring;
6512 	int r;
6513 
6514 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6515 		return;
6516 
6517 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6518 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6519 	if (r) {
6520 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6521 		return;
6522 	}
6523 	r = uvd_v1_0_init(rdev);
6524 	if (r) {
6525 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6526 		return;
6527 	}
6528 }
6529 
si_vce_init(struct radeon_device * rdev)6530 static void si_vce_init(struct radeon_device *rdev)
6531 {
6532 	int r;
6533 
6534 	if (!rdev->has_vce)
6535 		return;
6536 
6537 	r = radeon_vce_init(rdev);
6538 	if (r) {
6539 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6540 		/*
6541 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6542 		 * to early fails si_vce_start() and thus nothing happens
6543 		 * there. So it is pointless to try to go through that code
6544 		 * hence why we disable vce here.
6545 		 */
6546 		rdev->has_vce = false;
6547 		return;
6548 	}
6549 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6550 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6551 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6552 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6553 }
6554 
si_vce_start(struct radeon_device * rdev)6555 static void si_vce_start(struct radeon_device *rdev)
6556 {
6557 	int r;
6558 
6559 	if (!rdev->has_vce)
6560 		return;
6561 
6562 	r = radeon_vce_resume(rdev);
6563 	if (r) {
6564 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6565 		goto error;
6566 	}
6567 	r = vce_v1_0_resume(rdev);
6568 	if (r) {
6569 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6570 		goto error;
6571 	}
6572 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6573 	if (r) {
6574 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6575 		goto error;
6576 	}
6577 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6578 	if (r) {
6579 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6580 		goto error;
6581 	}
6582 	return;
6583 
6584 error:
6585 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6586 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6587 }
6588 
si_vce_resume(struct radeon_device * rdev)6589 static void si_vce_resume(struct radeon_device *rdev)
6590 {
6591 	struct radeon_ring *ring;
6592 	int r;
6593 
6594 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6595 		return;
6596 
6597 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6598 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6599 	if (r) {
6600 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6601 		return;
6602 	}
6603 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6604 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6605 	if (r) {
6606 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6607 		return;
6608 	}
6609 	r = vce_v1_0_init(rdev);
6610 	if (r) {
6611 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6612 		return;
6613 	}
6614 }
6615 
si_startup(struct radeon_device * rdev)6616 static int si_startup(struct radeon_device *rdev)
6617 {
6618 	struct radeon_ring *ring;
6619 	int r;
6620 
6621 	/* enable pcie gen2/3 link */
6622 	si_pcie_gen3_enable(rdev);
6623 	/* enable aspm */
6624 	si_program_aspm(rdev);
6625 
6626 	/* scratch needs to be initialized before MC */
6627 	r = r600_vram_scratch_init(rdev);
6628 	if (r)
6629 		return r;
6630 
6631 	si_mc_program(rdev);
6632 
6633 	if (!rdev->pm.dpm_enabled) {
6634 		r = si_mc_load_microcode(rdev);
6635 		if (r) {
6636 			DRM_ERROR("Failed to load MC firmware!\n");
6637 			return r;
6638 		}
6639 	}
6640 
6641 	r = si_pcie_gart_enable(rdev);
6642 	if (r)
6643 		return r;
6644 	si_gpu_init(rdev);
6645 
6646 	/* allocate rlc buffers */
6647 	if (rdev->family == CHIP_VERDE) {
6648 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6649 		rdev->rlc.reg_list_size =
6650 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6651 	}
6652 	rdev->rlc.cs_data = si_cs_data;
6653 	r = sumo_rlc_init(rdev);
6654 	if (r) {
6655 		DRM_ERROR("Failed to init rlc BOs!\n");
6656 		return r;
6657 	}
6658 
6659 	/* allocate wb buffer */
6660 	r = radeon_wb_init(rdev);
6661 	if (r)
6662 		return r;
6663 
6664 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6665 	if (r) {
6666 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6667 		return r;
6668 	}
6669 
6670 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6671 	if (r) {
6672 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6673 		return r;
6674 	}
6675 
6676 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6677 	if (r) {
6678 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6679 		return r;
6680 	}
6681 
6682 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6683 	if (r) {
6684 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6685 		return r;
6686 	}
6687 
6688 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6689 	if (r) {
6690 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6691 		return r;
6692 	}
6693 
6694 	si_uvd_start(rdev);
6695 	si_vce_start(rdev);
6696 
6697 	/* Enable IRQ */
6698 	if (!rdev->irq.installed) {
6699 		r = radeon_irq_kms_init(rdev);
6700 		if (r)
6701 			return r;
6702 	}
6703 
6704 	r = si_irq_init(rdev);
6705 	if (r) {
6706 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6707 		radeon_irq_kms_fini(rdev);
6708 		return r;
6709 	}
6710 	si_irq_set(rdev);
6711 
6712 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6713 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6714 			     RADEON_CP_PACKET2);
6715 	if (r)
6716 		return r;
6717 
6718 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6719 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6720 			     RADEON_CP_PACKET2);
6721 	if (r)
6722 		return r;
6723 
6724 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6725 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6726 			     RADEON_CP_PACKET2);
6727 	if (r)
6728 		return r;
6729 
6730 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6731 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6732 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6733 	if (r)
6734 		return r;
6735 
6736 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6737 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6738 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6739 	if (r)
6740 		return r;
6741 
6742 	r = si_cp_load_microcode(rdev);
6743 	if (r)
6744 		return r;
6745 	r = si_cp_resume(rdev);
6746 	if (r)
6747 		return r;
6748 
6749 	r = cayman_dma_resume(rdev);
6750 	if (r)
6751 		return r;
6752 
6753 	si_uvd_resume(rdev);
6754 	si_vce_resume(rdev);
6755 
6756 	r = radeon_ib_pool_init(rdev);
6757 	if (r) {
6758 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6759 		return r;
6760 	}
6761 
6762 	r = radeon_vm_manager_init(rdev);
6763 	if (r) {
6764 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6765 		return r;
6766 	}
6767 
6768 	r = radeon_audio_init(rdev);
6769 	if (r)
6770 		return r;
6771 
6772 	return 0;
6773 }
6774 
si_resume(struct radeon_device * rdev)6775 int si_resume(struct radeon_device *rdev)
6776 {
6777 	int r;
6778 
6779 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6780 	 * posting will perform necessary task to bring back GPU into good
6781 	 * shape.
6782 	 */
6783 	/* post card */
6784 	atom_asic_init(rdev->mode_info.atom_context);
6785 
6786 	/* init golden registers */
6787 	si_init_golden_registers(rdev);
6788 
6789 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6790 		radeon_pm_resume(rdev);
6791 
6792 	rdev->accel_working = true;
6793 	r = si_startup(rdev);
6794 	if (r) {
6795 		DRM_ERROR("si startup failed on resume\n");
6796 		rdev->accel_working = false;
6797 		return r;
6798 	}
6799 
6800 	return r;
6801 
6802 }
6803 
si_suspend(struct radeon_device * rdev)6804 int si_suspend(struct radeon_device *rdev)
6805 {
6806 	radeon_pm_suspend(rdev);
6807 	radeon_audio_fini(rdev);
6808 	radeon_vm_manager_fini(rdev);
6809 	si_cp_enable(rdev, false);
6810 	cayman_dma_stop(rdev);
6811 	if (rdev->has_uvd) {
6812 		uvd_v1_0_fini(rdev);
6813 		radeon_uvd_suspend(rdev);
6814 	}
6815 	if (rdev->has_vce)
6816 		radeon_vce_suspend(rdev);
6817 	si_fini_pg(rdev);
6818 	si_fini_cg(rdev);
6819 	si_irq_suspend(rdev);
6820 	radeon_wb_disable(rdev);
6821 	si_pcie_gart_disable(rdev);
6822 	return 0;
6823 }
6824 
6825 /* Plan is to move initialization in that function and use
6826  * helper function so that radeon_device_init pretty much
6827  * do nothing more than calling asic specific function. This
6828  * should also allow to remove a bunch of callback function
6829  * like vram_info.
6830  */
si_init(struct radeon_device * rdev)6831 int si_init(struct radeon_device *rdev)
6832 {
6833 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6834 	int r;
6835 
6836 	/* Read BIOS */
6837 	if (!radeon_get_bios(rdev)) {
6838 		if (ASIC_IS_AVIVO(rdev))
6839 			return -EINVAL;
6840 	}
6841 	/* Must be an ATOMBIOS */
6842 	if (!rdev->is_atom_bios) {
6843 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6844 		return -EINVAL;
6845 	}
6846 	r = radeon_atombios_init(rdev);
6847 	if (r)
6848 		return r;
6849 
6850 	/* Post card if necessary */
6851 	if (!radeon_card_posted(rdev)) {
6852 		if (!rdev->bios) {
6853 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6854 			return -EINVAL;
6855 		}
6856 		DRM_INFO("GPU not posted. posting now...\n");
6857 		atom_asic_init(rdev->mode_info.atom_context);
6858 	}
6859 	/* init golden registers */
6860 	si_init_golden_registers(rdev);
6861 	/* Initialize scratch registers */
6862 	si_scratch_init(rdev);
6863 	/* Initialize surface registers */
6864 	radeon_surface_init(rdev);
6865 	/* Initialize clocks */
6866 	radeon_get_clock_info(rdev->ddev);
6867 
6868 	/* Fence driver */
6869 	r = radeon_fence_driver_init(rdev);
6870 	if (r)
6871 		return r;
6872 
6873 	/* initialize memory controller */
6874 	r = si_mc_init(rdev);
6875 	if (r)
6876 		return r;
6877 	/* Memory manager */
6878 	r = radeon_bo_init(rdev);
6879 	if (r)
6880 		return r;
6881 
6882 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6883 	    !rdev->rlc_fw || !rdev->mc_fw) {
6884 		r = si_init_microcode(rdev);
6885 		if (r) {
6886 			DRM_ERROR("Failed to load firmware!\n");
6887 			return r;
6888 		}
6889 	}
6890 
6891 	/* Initialize power management */
6892 	radeon_pm_init(rdev);
6893 
6894 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6895 	ring->ring_obj = NULL;
6896 	r600_ring_init(rdev, ring, 1024 * 1024);
6897 
6898 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6899 	ring->ring_obj = NULL;
6900 	r600_ring_init(rdev, ring, 1024 * 1024);
6901 
6902 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6903 	ring->ring_obj = NULL;
6904 	r600_ring_init(rdev, ring, 1024 * 1024);
6905 
6906 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6907 	ring->ring_obj = NULL;
6908 	r600_ring_init(rdev, ring, 64 * 1024);
6909 
6910 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6911 	ring->ring_obj = NULL;
6912 	r600_ring_init(rdev, ring, 64 * 1024);
6913 
6914 	si_uvd_init(rdev);
6915 	si_vce_init(rdev);
6916 
6917 	rdev->ih.ring_obj = NULL;
6918 	r600_ih_ring_init(rdev, 64 * 1024);
6919 
6920 	r = r600_pcie_gart_init(rdev);
6921 	if (r)
6922 		return r;
6923 
6924 	rdev->accel_working = true;
6925 	r = si_startup(rdev);
6926 	if (r) {
6927 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6928 		si_cp_fini(rdev);
6929 		cayman_dma_fini(rdev);
6930 		si_irq_fini(rdev);
6931 		sumo_rlc_fini(rdev);
6932 		radeon_wb_fini(rdev);
6933 		radeon_ib_pool_fini(rdev);
6934 		radeon_vm_manager_fini(rdev);
6935 		radeon_irq_kms_fini(rdev);
6936 		si_pcie_gart_fini(rdev);
6937 		rdev->accel_working = false;
6938 	}
6939 
6940 	/* Don't start up if the MC ucode is missing.
6941 	 * The default clocks and voltages before the MC ucode
6942 	 * is loaded are not suffient for advanced operations.
6943 	 */
6944 	if (!rdev->mc_fw) {
6945 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6946 		return -EINVAL;
6947 	}
6948 
6949 	return 0;
6950 }
6951 
si_fini(struct radeon_device * rdev)6952 void si_fini(struct radeon_device *rdev)
6953 {
6954 	radeon_pm_fini(rdev);
6955 	si_cp_fini(rdev);
6956 	cayman_dma_fini(rdev);
6957 	si_fini_pg(rdev);
6958 	si_fini_cg(rdev);
6959 	si_irq_fini(rdev);
6960 	sumo_rlc_fini(rdev);
6961 	radeon_wb_fini(rdev);
6962 	radeon_vm_manager_fini(rdev);
6963 	radeon_ib_pool_fini(rdev);
6964 	radeon_irq_kms_fini(rdev);
6965 	if (rdev->has_uvd) {
6966 		uvd_v1_0_fini(rdev);
6967 		radeon_uvd_fini(rdev);
6968 	}
6969 	if (rdev->has_vce)
6970 		radeon_vce_fini(rdev);
6971 	si_pcie_gart_fini(rdev);
6972 	r600_vram_scratch_fini(rdev);
6973 	radeon_gem_fini(rdev);
6974 	radeon_fence_driver_fini(rdev);
6975 	radeon_bo_fini(rdev);
6976 	radeon_atombios_fini(rdev);
6977 	kfree(rdev->bios);
6978 	rdev->bios = NULL;
6979 }
6980 
6981 /**
6982  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6983  *
6984  * @rdev: radeon_device pointer
6985  *
6986  * Fetches a GPU clock counter snapshot (SI).
6987  * Returns the 64 bit clock counter snapshot.
6988  */
si_get_gpu_clock_counter(struct radeon_device * rdev)6989 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6990 {
6991 	uint64_t clock;
6992 
6993 	mutex_lock(&rdev->gpu_clock_mutex);
6994 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6995 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6996 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6997 	mutex_unlock(&rdev->gpu_clock_mutex);
6998 	return clock;
6999 }
7000 
si_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)7001 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7002 {
7003 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7004 	int r;
7005 
7006 	/* bypass vclk and dclk with bclk */
7007 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7008 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7009 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7010 
7011 	/* put PLL in bypass mode */
7012 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7013 
7014 	if (!vclk || !dclk) {
7015 		/* keep the Bypass mode */
7016 		return 0;
7017 	}
7018 
7019 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7020 					  16384, 0x03FFFFFF, 0, 128, 5,
7021 					  &fb_div, &vclk_div, &dclk_div);
7022 	if (r)
7023 		return r;
7024 
7025 	/* set RESET_ANTI_MUX to 0 */
7026 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7027 
7028 	/* set VCO_MODE to 1 */
7029 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7030 
7031 	/* disable sleep mode */
7032 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7033 
7034 	/* deassert UPLL_RESET */
7035 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7036 
7037 	mdelay(1);
7038 
7039 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7040 	if (r)
7041 		return r;
7042 
7043 	/* assert UPLL_RESET again */
7044 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7045 
7046 	/* disable spread spectrum. */
7047 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7048 
7049 	/* set feedback divider */
7050 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7051 
7052 	/* set ref divider to 0 */
7053 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7054 
7055 	if (fb_div < 307200)
7056 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7057 	else
7058 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7059 
7060 	/* set PDIV_A and PDIV_B */
7061 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7062 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7063 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7064 
7065 	/* give the PLL some time to settle */
7066 	mdelay(15);
7067 
7068 	/* deassert PLL_RESET */
7069 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7070 
7071 	mdelay(15);
7072 
7073 	/* switch from bypass mode to normal mode */
7074 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7075 
7076 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7077 	if (r)
7078 		return r;
7079 
7080 	/* switch VCLK and DCLK selection */
7081 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7082 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7083 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7084 
7085 	mdelay(100);
7086 
7087 	return 0;
7088 }
7089 
si_pcie_gen3_enable(struct radeon_device * rdev)7090 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7091 {
7092 	struct pci_dev *root = rdev->pdev->bus->self;
7093 	enum pci_bus_speed speed_cap;
7094 	u32 speed_cntl, current_data_rate;
7095 	int i;
7096 	u16 tmp16;
7097 
7098 	if (pci_is_root_bus(rdev->pdev->bus))
7099 		return;
7100 
7101 	if (radeon_pcie_gen2 == 0)
7102 		return;
7103 
7104 	if (rdev->flags & RADEON_IS_IGP)
7105 		return;
7106 
7107 	if (!(rdev->flags & RADEON_IS_PCIE))
7108 		return;
7109 
7110 	speed_cap = pcie_get_speed_cap(root);
7111 	if (speed_cap == PCI_SPEED_UNKNOWN)
7112 		return;
7113 
7114 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
7115 	    (speed_cap != PCIE_SPEED_5_0GT))
7116 		return;
7117 
7118 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7119 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7120 		LC_CURRENT_DATA_RATE_SHIFT;
7121 	if (speed_cap == PCIE_SPEED_8_0GT) {
7122 		if (current_data_rate == 2) {
7123 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7124 			return;
7125 		}
7126 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7127 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
7128 		if (current_data_rate == 1) {
7129 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7130 			return;
7131 		}
7132 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7133 	}
7134 
7135 	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7136 		return;
7137 
7138 	if (speed_cap == PCIE_SPEED_8_0GT) {
7139 		/* re-try equalization if gen3 is not already enabled */
7140 		if (current_data_rate != 2) {
7141 			u16 bridge_cfg, gpu_cfg;
7142 			u16 bridge_cfg2, gpu_cfg2;
7143 			u32 max_lw, current_lw, tmp;
7144 
7145 			pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
7146 			pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
7147 
7148 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7149 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7150 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7151 
7152 			if (current_lw < max_lw) {
7153 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7154 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7155 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7156 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7157 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7158 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7159 				}
7160 			}
7161 
7162 			for (i = 0; i < 10; i++) {
7163 				/* check status */
7164 				pcie_capability_read_word(rdev->pdev,
7165 							  PCI_EXP_DEVSTA,
7166 							  &tmp16);
7167 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7168 					break;
7169 
7170 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7171 							  &bridge_cfg);
7172 				pcie_capability_read_word(rdev->pdev,
7173 							  PCI_EXP_LNKCTL,
7174 							  &gpu_cfg);
7175 
7176 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7177 							  &bridge_cfg2);
7178 				pcie_capability_read_word(rdev->pdev,
7179 							  PCI_EXP_LNKCTL2,
7180 							  &gpu_cfg2);
7181 
7182 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7183 				tmp |= LC_SET_QUIESCE;
7184 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7185 
7186 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7187 				tmp |= LC_REDO_EQ;
7188 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7189 
7190 				msleep(100);
7191 
7192 				/* linkctl */
7193 				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
7194 								   PCI_EXP_LNKCTL_HAWD,
7195 								   bridge_cfg &
7196 								   PCI_EXP_LNKCTL_HAWD);
7197 				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
7198 								   PCI_EXP_LNKCTL_HAWD,
7199 								   gpu_cfg &
7200 								   PCI_EXP_LNKCTL_HAWD);
7201 
7202 				/* linkctl2 */
7203 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7204 							  &tmp16);
7205 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7206 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7207 				tmp16 |= (bridge_cfg2 &
7208 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7209 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7210 				pcie_capability_write_word(root,
7211 							   PCI_EXP_LNKCTL2,
7212 							   tmp16);
7213 
7214 				pcie_capability_read_word(rdev->pdev,
7215 							  PCI_EXP_LNKCTL2,
7216 							  &tmp16);
7217 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7218 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7219 				tmp16 |= (gpu_cfg2 &
7220 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7221 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7222 				pcie_capability_write_word(rdev->pdev,
7223 							   PCI_EXP_LNKCTL2,
7224 							   tmp16);
7225 
7226 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7227 				tmp &= ~LC_SET_QUIESCE;
7228 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7229 			}
7230 		}
7231 	}
7232 
7233 	/* set the link speed */
7234 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7235 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7236 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7237 
7238 	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7239 	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7240 	if (speed_cap == PCIE_SPEED_8_0GT)
7241 		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7242 	else if (speed_cap == PCIE_SPEED_5_0GT)
7243 		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7244 	else
7245 		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7246 	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7247 
7248 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7249 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7250 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7251 
7252 	for (i = 0; i < rdev->usec_timeout; i++) {
7253 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7254 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7255 			break;
7256 		udelay(1);
7257 	}
7258 }
7259 
si_program_aspm(struct radeon_device * rdev)7260 static void si_program_aspm(struct radeon_device *rdev)
7261 {
7262 	u32 data, orig;
7263 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7264 	bool disable_clkreq = false;
7265 
7266 	if (radeon_aspm == 0)
7267 		return;
7268 
7269 	if (!(rdev->flags & RADEON_IS_PCIE))
7270 		return;
7271 
7272 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7273 	data &= ~LC_XMIT_N_FTS_MASK;
7274 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7275 	if (orig != data)
7276 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7277 
7278 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7279 	data |= LC_GO_TO_RECOVERY;
7280 	if (orig != data)
7281 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7282 
7283 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7284 	data |= P_IGNORE_EDB_ERR;
7285 	if (orig != data)
7286 		WREG32_PCIE(PCIE_P_CNTL, data);
7287 
7288 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7289 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7290 	data |= LC_PMI_TO_L1_DIS;
7291 	if (!disable_l0s)
7292 		data |= LC_L0S_INACTIVITY(7);
7293 
7294 	if (!disable_l1) {
7295 		data |= LC_L1_INACTIVITY(7);
7296 		data &= ~LC_PMI_TO_L1_DIS;
7297 		if (orig != data)
7298 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7299 
7300 		if (!disable_plloff_in_l1) {
7301 			bool clk_req_support;
7302 
7303 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7304 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7305 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7306 			if (orig != data)
7307 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7308 
7309 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7310 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7311 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7312 			if (orig != data)
7313 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7314 
7315 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7316 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7317 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7318 			if (orig != data)
7319 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7320 
7321 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7322 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7323 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7324 			if (orig != data)
7325 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7326 
7327 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7328 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7329 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7330 				if (orig != data)
7331 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7332 
7333 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7334 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7335 				if (orig != data)
7336 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7337 
7338 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7339 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7340 				if (orig != data)
7341 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7342 
7343 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7344 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7345 				if (orig != data)
7346 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7347 
7348 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7349 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7350 				if (orig != data)
7351 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7352 
7353 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7354 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7355 				if (orig != data)
7356 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7357 
7358 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7359 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7360 				if (orig != data)
7361 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7362 
7363 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7364 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7365 				if (orig != data)
7366 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7367 			}
7368 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7369 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7370 			data |= LC_DYN_LANES_PWR_STATE(3);
7371 			if (orig != data)
7372 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7373 
7374 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7375 			data &= ~LS2_EXIT_TIME_MASK;
7376 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7377 				data |= LS2_EXIT_TIME(5);
7378 			if (orig != data)
7379 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7380 
7381 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7382 			data &= ~LS2_EXIT_TIME_MASK;
7383 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7384 				data |= LS2_EXIT_TIME(5);
7385 			if (orig != data)
7386 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7387 
7388 			if (!disable_clkreq &&
7389 			    !pci_is_root_bus(rdev->pdev->bus)) {
7390 				struct pci_dev *root = rdev->pdev->bus->self;
7391 				u32 lnkcap;
7392 
7393 				clk_req_support = false;
7394 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7395 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7396 					clk_req_support = true;
7397 			} else {
7398 				clk_req_support = false;
7399 			}
7400 
7401 			if (clk_req_support) {
7402 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7403 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7404 				if (orig != data)
7405 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7406 
7407 				orig = data = RREG32(THM_CLK_CNTL);
7408 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7409 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7410 				if (orig != data)
7411 					WREG32(THM_CLK_CNTL, data);
7412 
7413 				orig = data = RREG32(MISC_CLK_CNTL);
7414 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7415 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7416 				if (orig != data)
7417 					WREG32(MISC_CLK_CNTL, data);
7418 
7419 				orig = data = RREG32(CG_CLKPIN_CNTL);
7420 				data &= ~BCLK_AS_XCLK;
7421 				if (orig != data)
7422 					WREG32(CG_CLKPIN_CNTL, data);
7423 
7424 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7425 				data &= ~FORCE_BIF_REFCLK_EN;
7426 				if (orig != data)
7427 					WREG32(CG_CLKPIN_CNTL_2, data);
7428 
7429 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7430 				data &= ~MPLL_CLKOUT_SEL_MASK;
7431 				data |= MPLL_CLKOUT_SEL(4);
7432 				if (orig != data)
7433 					WREG32(MPLL_BYPASSCLK_SEL, data);
7434 
7435 				orig = data = RREG32(SPLL_CNTL_MODE);
7436 				data &= ~SPLL_REFCLK_SEL_MASK;
7437 				if (orig != data)
7438 					WREG32(SPLL_CNTL_MODE, data);
7439 			}
7440 		}
7441 	} else {
7442 		if (orig != data)
7443 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7444 	}
7445 
7446 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7447 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7448 	if (orig != data)
7449 		WREG32_PCIE(PCIE_CNTL2, data);
7450 
7451 	if (!disable_l0s) {
7452 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7453 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7454 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7455 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7456 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7457 				data &= ~LC_L0S_INACTIVITY_MASK;
7458 				if (orig != data)
7459 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7460 			}
7461 		}
7462 	}
7463 }
7464 
si_vce_send_vcepll_ctlreq(struct radeon_device * rdev)7465 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7466 {
7467 	unsigned i;
7468 
7469 	/* make sure VCEPLL_CTLREQ is deasserted */
7470 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7471 
7472 	mdelay(10);
7473 
7474 	/* assert UPLL_CTLREQ */
7475 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7476 
7477 	/* wait for CTLACK and CTLACK2 to get asserted */
7478 	for (i = 0; i < 100; ++i) {
7479 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7480 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7481 			break;
7482 		mdelay(10);
7483 	}
7484 
7485 	/* deassert UPLL_CTLREQ */
7486 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7487 
7488 	if (i == 100) {
7489 		DRM_ERROR("Timeout setting UVD clocks!\n");
7490 		return -ETIMEDOUT;
7491 	}
7492 
7493 	return 0;
7494 }
7495 
si_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)7496 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7497 {
7498 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7499 	int r;
7500 
7501 	/* bypass evclk and ecclk with bclk */
7502 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7503 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7504 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7505 
7506 	/* put PLL in bypass mode */
7507 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7508 		     ~VCEPLL_BYPASS_EN_MASK);
7509 
7510 	if (!evclk || !ecclk) {
7511 		/* keep the Bypass mode, put PLL to sleep */
7512 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7513 			     ~VCEPLL_SLEEP_MASK);
7514 		return 0;
7515 	}
7516 
7517 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7518 					  16384, 0x03FFFFFF, 0, 128, 5,
7519 					  &fb_div, &evclk_div, &ecclk_div);
7520 	if (r)
7521 		return r;
7522 
7523 	/* set RESET_ANTI_MUX to 0 */
7524 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7525 
7526 	/* set VCO_MODE to 1 */
7527 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7528 		     ~VCEPLL_VCO_MODE_MASK);
7529 
7530 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7531 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7532 		     ~VCEPLL_SLEEP_MASK);
7533 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7534 
7535 	/* deassert VCEPLL_RESET */
7536 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7537 
7538 	mdelay(1);
7539 
7540 	r = si_vce_send_vcepll_ctlreq(rdev);
7541 	if (r)
7542 		return r;
7543 
7544 	/* assert VCEPLL_RESET again */
7545 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7546 
7547 	/* disable spread spectrum. */
7548 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7549 
7550 	/* set feedback divider */
7551 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7552 
7553 	/* set ref divider to 0 */
7554 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7555 
7556 	/* set PDIV_A and PDIV_B */
7557 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7558 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7559 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7560 
7561 	/* give the PLL some time to settle */
7562 	mdelay(15);
7563 
7564 	/* deassert PLL_RESET */
7565 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7566 
7567 	mdelay(15);
7568 
7569 	/* switch from bypass mode to normal mode */
7570 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7571 
7572 	r = si_vce_send_vcepll_ctlreq(rdev);
7573 	if (r)
7574 		return r;
7575 
7576 	/* switch VCLK and DCLK selection */
7577 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7578 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7579 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7580 
7581 	mdelay(100);
7582 
7583 	return 0;
7584 }
7585