1 /*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include "amdgpu.h"
25 #include "amdgpu_discovery.h"
26 #include "soc15_hw_ip.h"
27 #include "discovery.h"
28
29 #define mmRCC_CONFIG_MEMSIZE 0xde3
30 #define mmMM_INDEX 0x0
31 #define mmMM_INDEX_HI 0x6
32 #define mmMM_DATA 0x1
33 #define HW_ID_MAX 300
34
35 static const char *hw_id_names[HW_ID_MAX] = {
36 [MP1_HWID] = "MP1",
37 [MP2_HWID] = "MP2",
38 [THM_HWID] = "THM",
39 [SMUIO_HWID] = "SMUIO",
40 [FUSE_HWID] = "FUSE",
41 [CLKA_HWID] = "CLKA",
42 [PWR_HWID] = "PWR",
43 [GC_HWID] = "GC",
44 [UVD_HWID] = "UVD",
45 [AUDIO_AZ_HWID] = "AUDIO_AZ",
46 [ACP_HWID] = "ACP",
47 [DCI_HWID] = "DCI",
48 [DMU_HWID] = "DMU",
49 [DCO_HWID] = "DCO",
50 [DIO_HWID] = "DIO",
51 [XDMA_HWID] = "XDMA",
52 [DCEAZ_HWID] = "DCEAZ",
53 [DAZ_HWID] = "DAZ",
54 [SDPMUX_HWID] = "SDPMUX",
55 [NTB_HWID] = "NTB",
56 [IOHC_HWID] = "IOHC",
57 [L2IMU_HWID] = "L2IMU",
58 [VCE_HWID] = "VCE",
59 [MMHUB_HWID] = "MMHUB",
60 [ATHUB_HWID] = "ATHUB",
61 [DBGU_NBIO_HWID] = "DBGU_NBIO",
62 [DFX_HWID] = "DFX",
63 [DBGU0_HWID] = "DBGU0",
64 [DBGU1_HWID] = "DBGU1",
65 [OSSSYS_HWID] = "OSSSYS",
66 [HDP_HWID] = "HDP",
67 [SDMA0_HWID] = "SDMA0",
68 [SDMA1_HWID] = "SDMA1",
69 [ISP_HWID] = "ISP",
70 [DBGU_IO_HWID] = "DBGU_IO",
71 [DF_HWID] = "DF",
72 [CLKB_HWID] = "CLKB",
73 [FCH_HWID] = "FCH",
74 [DFX_DAP_HWID] = "DFX_DAP",
75 [L1IMU_PCIE_HWID] = "L1IMU_PCIE",
76 [L1IMU_NBIF_HWID] = "L1IMU_NBIF",
77 [L1IMU_IOAGR_HWID] = "L1IMU_IOAGR",
78 [L1IMU3_HWID] = "L1IMU3",
79 [L1IMU4_HWID] = "L1IMU4",
80 [L1IMU5_HWID] = "L1IMU5",
81 [L1IMU6_HWID] = "L1IMU6",
82 [L1IMU7_HWID] = "L1IMU7",
83 [L1IMU8_HWID] = "L1IMU8",
84 [L1IMU9_HWID] = "L1IMU9",
85 [L1IMU10_HWID] = "L1IMU10",
86 [L1IMU11_HWID] = "L1IMU11",
87 [L1IMU12_HWID] = "L1IMU12",
88 [L1IMU13_HWID] = "L1IMU13",
89 [L1IMU14_HWID] = "L1IMU14",
90 [L1IMU15_HWID] = "L1IMU15",
91 [WAFLC_HWID] = "WAFLC",
92 [FCH_USB_PD_HWID] = "FCH_USB_PD",
93 [PCIE_HWID] = "PCIE",
94 [PCS_HWID] = "PCS",
95 [DDCL_HWID] = "DDCL",
96 [SST_HWID] = "SST",
97 [IOAGR_HWID] = "IOAGR",
98 [NBIF_HWID] = "NBIF",
99 [IOAPIC_HWID] = "IOAPIC",
100 [SYSTEMHUB_HWID] = "SYSTEMHUB",
101 [NTBCCP_HWID] = "NTBCCP",
102 [UMC_HWID] = "UMC",
103 [SATA_HWID] = "SATA",
104 [USB_HWID] = "USB",
105 [CCXSEC_HWID] = "CCXSEC",
106 [XGMI_HWID] = "XGMI",
107 [XGBE_HWID] = "XGBE",
108 [MP0_HWID] = "MP0",
109 };
110
111 static int hw_id_map[MAX_HWIP] = {
112 [GC_HWIP] = GC_HWID,
113 [HDP_HWIP] = HDP_HWID,
114 [SDMA0_HWIP] = SDMA0_HWID,
115 [SDMA1_HWIP] = SDMA1_HWID,
116 [MMHUB_HWIP] = MMHUB_HWID,
117 [ATHUB_HWIP] = ATHUB_HWID,
118 [NBIO_HWIP] = NBIF_HWID,
119 [MP0_HWIP] = MP0_HWID,
120 [MP1_HWIP] = MP1_HWID,
121 [UVD_HWIP] = UVD_HWID,
122 [VCE_HWIP] = VCE_HWID,
123 [DF_HWIP] = DF_HWID,
124 [DCE_HWIP] = DMU_HWID,
125 [OSSSYS_HWIP] = OSSSYS_HWID,
126 [SMUIO_HWIP] = SMUIO_HWID,
127 [PWR_HWIP] = PWR_HWID,
128 [NBIF_HWIP] = NBIF_HWID,
129 [THM_HWIP] = THM_HWID,
130 [CLK_HWIP] = CLKA_HWID,
131 [UMC_HWIP] = UMC_HWID,
132 };
133
amdgpu_discovery_read_binary(struct amdgpu_device * adev,uint8_t * binary)134 static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
135 {
136 uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
137 uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
138
139 amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
140 adev->mman.discovery_tmr_size, false);
141 return 0;
142 }
143
amdgpu_discovery_calculate_checksum(uint8_t * data,uint32_t size)144 static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size)
145 {
146 uint16_t checksum = 0;
147 int i;
148
149 for (i = 0; i < size; i++)
150 checksum += data[i];
151
152 return checksum;
153 }
154
amdgpu_discovery_verify_checksum(uint8_t * data,uint32_t size,uint16_t expected)155 static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size,
156 uint16_t expected)
157 {
158 return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
159 }
160
amdgpu_discovery_init(struct amdgpu_device * adev)161 static int amdgpu_discovery_init(struct amdgpu_device *adev)
162 {
163 struct table_info *info;
164 struct binary_header *bhdr;
165 struct ip_discovery_header *ihdr;
166 struct gpu_info_header *ghdr;
167 uint16_t offset;
168 uint16_t size;
169 uint16_t checksum;
170 int r;
171
172 adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE;
173 adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL);
174 if (!adev->mman.discovery_bin)
175 return -ENOMEM;
176
177 r = amdgpu_discovery_read_binary(adev, adev->mman.discovery_bin);
178 if (r) {
179 DRM_ERROR("failed to read ip discovery binary\n");
180 goto out;
181 }
182
183 bhdr = (struct binary_header *)adev->mman.discovery_bin;
184
185 if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) {
186 DRM_ERROR("invalid ip discovery binary signature\n");
187 r = -EINVAL;
188 goto out;
189 }
190
191 offset = offsetof(struct binary_header, binary_checksum) +
192 sizeof(bhdr->binary_checksum);
193 size = bhdr->binary_size - offset;
194 checksum = bhdr->binary_checksum;
195
196 if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
197 size, checksum)) {
198 DRM_ERROR("invalid ip discovery binary checksum\n");
199 r = -EINVAL;
200 goto out;
201 }
202
203 info = &bhdr->table_list[IP_DISCOVERY];
204 offset = le16_to_cpu(info->offset);
205 checksum = le16_to_cpu(info->checksum);
206 ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
207
208 if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
209 DRM_ERROR("invalid ip discovery data table signature\n");
210 r = -EINVAL;
211 goto out;
212 }
213
214 if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
215 ihdr->size, checksum)) {
216 DRM_ERROR("invalid ip discovery data table checksum\n");
217 r = -EINVAL;
218 goto out;
219 }
220
221 info = &bhdr->table_list[GC];
222 offset = le16_to_cpu(info->offset);
223 checksum = le16_to_cpu(info->checksum);
224 ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
225
226 if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
227 ghdr->size, checksum)) {
228 DRM_ERROR("invalid gc data table checksum\n");
229 r = -EINVAL;
230 goto out;
231 }
232
233 return 0;
234
235 out:
236 kfree(adev->mman.discovery_bin);
237 adev->mman.discovery_bin = NULL;
238
239 return r;
240 }
241
amdgpu_discovery_fini(struct amdgpu_device * adev)242 void amdgpu_discovery_fini(struct amdgpu_device *adev)
243 {
244 kfree(adev->mman.discovery_bin);
245 adev->mman.discovery_bin = NULL;
246 }
247
amdgpu_discovery_reg_base_init(struct amdgpu_device * adev)248 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
249 {
250 struct binary_header *bhdr;
251 struct ip_discovery_header *ihdr;
252 struct die_header *dhdr;
253 struct ip *ip;
254 uint16_t die_offset;
255 uint16_t ip_offset;
256 uint16_t num_dies;
257 uint16_t num_ips;
258 uint8_t num_base_address;
259 int hw_ip;
260 int i, j, k;
261 int r;
262
263 r = amdgpu_discovery_init(adev);
264 if (r) {
265 DRM_ERROR("amdgpu_discovery_init failed\n");
266 return r;
267 }
268
269 bhdr = (struct binary_header *)adev->mman.discovery_bin;
270 ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
271 le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
272 num_dies = le16_to_cpu(ihdr->num_dies);
273
274 DRM_DEBUG("number of dies: %d\n", num_dies);
275
276 for (i = 0; i < num_dies; i++) {
277 die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
278 dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
279 num_ips = le16_to_cpu(dhdr->num_ips);
280 ip_offset = die_offset + sizeof(*dhdr);
281
282 if (le16_to_cpu(dhdr->die_id) != i) {
283 DRM_ERROR("invalid die id %d, expected %d\n",
284 le16_to_cpu(dhdr->die_id), i);
285 return -EINVAL;
286 }
287
288 DRM_DEBUG("number of hardware IPs on die%d: %d\n",
289 le16_to_cpu(dhdr->die_id), num_ips);
290
291 for (j = 0; j < num_ips; j++) {
292 ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
293 num_base_address = ip->num_base_address;
294
295 DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
296 hw_id_names[le16_to_cpu(ip->hw_id)],
297 le16_to_cpu(ip->hw_id),
298 ip->number_instance,
299 ip->major, ip->minor,
300 ip->revision);
301
302 if (le16_to_cpu(ip->hw_id) == VCN_HWID)
303 adev->vcn.num_vcn_inst++;
304
305 for (k = 0; k < num_base_address; k++) {
306 /*
307 * convert the endianness of base addresses in place,
308 * so that we don't need to convert them when accessing adev->reg_offset.
309 */
310 ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
311 DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
312 }
313
314 for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
315 if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
316 DRM_DEBUG("set register base offset for %s\n",
317 hw_id_names[le16_to_cpu(ip->hw_id)]);
318 adev->reg_offset[hw_ip][ip->number_instance] =
319 ip->base_address;
320 }
321
322 }
323
324 ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
325 }
326 }
327
328 return 0;
329 }
330
amdgpu_discovery_get_ip_version(struct amdgpu_device * adev,int hw_id,int number_instance,int * major,int * minor,int * revision)331 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
332 int *major, int *minor, int *revision)
333 {
334 struct binary_header *bhdr;
335 struct ip_discovery_header *ihdr;
336 struct die_header *dhdr;
337 struct ip *ip;
338 uint16_t die_offset;
339 uint16_t ip_offset;
340 uint16_t num_dies;
341 uint16_t num_ips;
342 int i, j;
343
344 if (!adev->mman.discovery_bin) {
345 DRM_ERROR("ip discovery uninitialized\n");
346 return -EINVAL;
347 }
348
349 bhdr = (struct binary_header *)adev->mman.discovery_bin;
350 ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
351 le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
352 num_dies = le16_to_cpu(ihdr->num_dies);
353
354 for (i = 0; i < num_dies; i++) {
355 die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
356 dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
357 num_ips = le16_to_cpu(dhdr->num_ips);
358 ip_offset = die_offset + sizeof(*dhdr);
359
360 for (j = 0; j < num_ips; j++) {
361 ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
362
363 if ((le16_to_cpu(ip->hw_id) == hw_id) && (ip->number_instance == number_instance)) {
364 if (major)
365 *major = ip->major;
366 if (minor)
367 *minor = ip->minor;
368 if (revision)
369 *revision = ip->revision;
370 return 0;
371 }
372 ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
373 }
374 }
375
376 return -EINVAL;
377 }
378
379
amdgpu_discovery_get_vcn_version(struct amdgpu_device * adev,int vcn_instance,int * major,int * minor,int * revision)380 int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance,
381 int *major, int *minor, int *revision)
382 {
383 return amdgpu_discovery_get_ip_version(adev, VCN_HWID,
384 vcn_instance, major, minor, revision);
385 }
386
amdgpu_discovery_harvest_ip(struct amdgpu_device * adev)387 void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
388 {
389 struct binary_header *bhdr;
390 struct harvest_table *harvest_info;
391 int i, vcn_harvest_count = 0;
392
393 bhdr = (struct binary_header *)adev->mman.discovery_bin;
394 harvest_info = (struct harvest_table *)(adev->mman.discovery_bin +
395 le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset));
396
397 for (i = 0; i < 32; i++) {
398 if (le32_to_cpu(harvest_info->list[i].hw_id) == 0)
399 break;
400
401 switch (le32_to_cpu(harvest_info->list[i].hw_id)) {
402 case VCN_HWID:
403 vcn_harvest_count++;
404 break;
405 case DMU_HWID:
406 adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
407 break;
408 default:
409 break;
410 }
411 }
412 if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
413 adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
414 adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
415 }
416 }
417
418 union gc_info {
419 struct gc_info_v1_0 v1;
420 struct gc_info_v2_0 v2;
421 };
422
amdgpu_discovery_get_gfx_info(struct amdgpu_device * adev)423 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
424 {
425 struct binary_header *bhdr;
426 union gc_info *gc_info;
427
428 if (!adev->mman.discovery_bin) {
429 DRM_ERROR("ip discovery uninitialized\n");
430 return -EINVAL;
431 }
432
433 bhdr = (struct binary_header *)adev->mman.discovery_bin;
434 gc_info = (union gc_info *)(adev->mman.discovery_bin +
435 le16_to_cpu(bhdr->table_list[GC].offset));
436 switch (gc_info->v1.header.version_major) {
437 case 1:
438 adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
439 adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
440 le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
441 adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
442 adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
443 adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
444 adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
445 adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
446 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
447 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
448 adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
449 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
450 adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
451 adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
452 adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
453 adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
454 le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
455 adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
456 break;
457 case 2:
458 adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
459 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
460 adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
461 adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
462 adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
463 adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
464 adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
465 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
466 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
467 adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
468 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
469 adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
470 adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
471 adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
472 adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
473 le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
474 adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
475 break;
476 default:
477 dev_err(adev->dev,
478 "Unhandled GC info table %d.%d\n",
479 gc_info->v1.header.version_major,
480 gc_info->v1.header.version_minor);
481 return -EINVAL;
482 }
483 return 0;
484 }
485