1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27 #include <linux/firmware.h>
28 #include <linux/module.h>
29 #include <linux/pci.h>
30 #include <drm/drm_drv.h>
31
32 #include "amdgpu.h"
33 #include "amdgpu_pm.h"
34 #include "amdgpu_vcn.h"
35 #include "soc15d.h"
36
37 /* Firmware Names */
38 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
39 #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
40 #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
41 #define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin"
42 #define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin"
43 #define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
44 #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
45 #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
46 #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
47 #define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin"
48 #define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
49 #define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin"
50 #define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin"
51 #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
52 #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin"
53 #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin"
54
55 MODULE_FIRMWARE(FIRMWARE_RAVEN);
56 MODULE_FIRMWARE(FIRMWARE_PICASSO);
57 MODULE_FIRMWARE(FIRMWARE_RAVEN2);
58 MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
59 MODULE_FIRMWARE(FIRMWARE_RENOIR);
60 MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE);
61 MODULE_FIRMWARE(FIRMWARE_ALDEBARAN);
62 MODULE_FIRMWARE(FIRMWARE_NAVI10);
63 MODULE_FIRMWARE(FIRMWARE_NAVI14);
64 MODULE_FIRMWARE(FIRMWARE_NAVI12);
65 MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
66 MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER);
67 MODULE_FIRMWARE(FIRMWARE_VANGOGH);
68 MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);
69 MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
70 MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
71
72 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
73
amdgpu_vcn_sw_init(struct amdgpu_device * adev)74 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
75 {
76 unsigned long bo_size;
77 const char *fw_name;
78 const struct common_firmware_header *hdr;
79 unsigned char fw_check;
80 int i, r;
81
82 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
83 mutex_init(&adev->vcn.vcn_pg_lock);
84 mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
85 atomic_set(&adev->vcn.total_submission_cnt, 0);
86 for (i = 0; i < adev->vcn.num_vcn_inst; i++)
87 atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
88
89 switch (adev->asic_type) {
90 case CHIP_RAVEN:
91 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
92 fw_name = FIRMWARE_RAVEN2;
93 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
94 fw_name = FIRMWARE_PICASSO;
95 else
96 fw_name = FIRMWARE_RAVEN;
97 break;
98 case CHIP_ARCTURUS:
99 fw_name = FIRMWARE_ARCTURUS;
100 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
101 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
102 adev->vcn.indirect_sram = true;
103 break;
104 case CHIP_RENOIR:
105 if (adev->apu_flags & AMD_APU_IS_RENOIR)
106 fw_name = FIRMWARE_RENOIR;
107 else
108 fw_name = FIRMWARE_GREEN_SARDINE;
109
110 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
111 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
112 adev->vcn.indirect_sram = true;
113 break;
114 case CHIP_ALDEBARAN:
115 fw_name = FIRMWARE_ALDEBARAN;
116 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
117 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
118 adev->vcn.indirect_sram = true;
119 break;
120 case CHIP_NAVI10:
121 fw_name = FIRMWARE_NAVI10;
122 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
123 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
124 adev->vcn.indirect_sram = true;
125 break;
126 case CHIP_NAVI14:
127 fw_name = FIRMWARE_NAVI14;
128 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
129 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
130 adev->vcn.indirect_sram = true;
131 break;
132 case CHIP_NAVI12:
133 fw_name = FIRMWARE_NAVI12;
134 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
135 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
136 adev->vcn.indirect_sram = true;
137 break;
138 case CHIP_SIENNA_CICHLID:
139 fw_name = FIRMWARE_SIENNA_CICHLID;
140 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
141 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
142 adev->vcn.indirect_sram = true;
143 break;
144 case CHIP_NAVY_FLOUNDER:
145 fw_name = FIRMWARE_NAVY_FLOUNDER;
146 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
147 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
148 adev->vcn.indirect_sram = true;
149 break;
150 case CHIP_VANGOGH:
151 fw_name = FIRMWARE_VANGOGH;
152 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
153 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
154 adev->vcn.indirect_sram = true;
155 break;
156 case CHIP_DIMGREY_CAVEFISH:
157 fw_name = FIRMWARE_DIMGREY_CAVEFISH;
158 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
159 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
160 adev->vcn.indirect_sram = true;
161 break;
162 case CHIP_BEIGE_GOBY:
163 fw_name = FIRMWARE_BEIGE_GOBY;
164 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
165 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
166 adev->vcn.indirect_sram = true;
167 break;
168 case CHIP_YELLOW_CARP:
169 fw_name = FIRMWARE_YELLOW_CARP;
170 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
171 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
172 adev->vcn.indirect_sram = true;
173 break;
174 default:
175 return -EINVAL;
176 }
177
178 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
179 if (r) {
180 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
181 fw_name);
182 return r;
183 }
184
185 r = amdgpu_ucode_validate(adev->vcn.fw);
186 if (r) {
187 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
188 fw_name);
189 release_firmware(adev->vcn.fw);
190 adev->vcn.fw = NULL;
191 return r;
192 }
193
194 hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
195 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
196
197 /* Bit 20-23, it is encode major and non-zero for new naming convention.
198 * This field is part of version minor and DRM_DISABLED_FLAG in old naming
199 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
200 * is zero in old naming convention, this field is always zero so far.
201 * These four bits are used to tell which naming convention is present.
202 */
203 fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
204 if (fw_check) {
205 unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;
206
207 fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
208 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
209 enc_major = fw_check;
210 dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
211 vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
212 DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
213 enc_major, enc_minor, dec_ver, vep, fw_rev);
214 } else {
215 unsigned int version_major, version_minor, family_id;
216
217 family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
218 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
219 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
220 DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
221 version_major, version_minor, family_id);
222 }
223
224 bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
225 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
226 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
227 bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
228
229 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
230 if (adev->vcn.harvest_config & (1 << i))
231 continue;
232
233 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
234 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
235 &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
236 if (r) {
237 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
238 return r;
239 }
240
241 adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr +
242 bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
243 adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr +
244 bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
245
246 if (adev->vcn.indirect_sram) {
247 r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
248 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
249 &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
250 if (r) {
251 dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
252 return r;
253 }
254 }
255 }
256
257 return 0;
258 }
259
amdgpu_vcn_sw_fini(struct amdgpu_device * adev)260 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
261 {
262 int i, j;
263
264 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
265 if (adev->vcn.harvest_config & (1 << j))
266 continue;
267
268 if (adev->vcn.indirect_sram) {
269 amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
270 &adev->vcn.inst[j].dpg_sram_gpu_addr,
271 (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
272 }
273 kvfree(adev->vcn.inst[j].saved_bo);
274
275 amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
276 &adev->vcn.inst[j].gpu_addr,
277 (void **)&adev->vcn.inst[j].cpu_addr);
278
279 amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
280
281 for (i = 0; i < adev->vcn.num_enc_rings; ++i)
282 amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
283 }
284
285 release_firmware(adev->vcn.fw);
286 mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
287 mutex_destroy(&adev->vcn.vcn_pg_lock);
288
289 return 0;
290 }
291
amdgpu_vcn_is_disabled_vcn(struct amdgpu_device * adev,enum vcn_ring_type type,uint32_t vcn_instance)292 bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
293 {
294 bool ret = false;
295
296 int major;
297 int minor;
298 int revision;
299
300 /* if cannot find IP data, then this VCN does not exist */
301 if (amdgpu_discovery_get_vcn_version(adev, vcn_instance, &major, &minor, &revision) != 0)
302 return true;
303
304 if ((type == VCN_ENCODE_RING) && (revision & VCN_BLOCK_ENCODE_DISABLE_MASK)) {
305 ret = true;
306 } else if ((type == VCN_DECODE_RING) && (revision & VCN_BLOCK_DECODE_DISABLE_MASK)) {
307 ret = true;
308 } else if ((type == VCN_UNIFIED_RING) && (revision & VCN_BLOCK_QUEUE_DISABLE_MASK)) {
309 ret = true;
310 }
311
312 return ret;
313 }
314
amdgpu_vcn_suspend(struct amdgpu_device * adev)315 int amdgpu_vcn_suspend(struct amdgpu_device *adev)
316 {
317 unsigned size;
318 void *ptr;
319 int i, idx;
320
321 cancel_delayed_work_sync(&adev->vcn.idle_work);
322
323 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
324 if (adev->vcn.harvest_config & (1 << i))
325 continue;
326 if (adev->vcn.inst[i].vcpu_bo == NULL)
327 return 0;
328
329 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
330 ptr = adev->vcn.inst[i].cpu_addr;
331
332 adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
333 if (!adev->vcn.inst[i].saved_bo)
334 return -ENOMEM;
335
336 if (drm_dev_enter(&adev->ddev, &idx)) {
337 memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
338 drm_dev_exit(idx);
339 }
340 }
341 return 0;
342 }
343
amdgpu_vcn_resume(struct amdgpu_device * adev)344 int amdgpu_vcn_resume(struct amdgpu_device *adev)
345 {
346 unsigned size;
347 void *ptr;
348 int i, idx;
349
350 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
351 if (adev->vcn.harvest_config & (1 << i))
352 continue;
353 if (adev->vcn.inst[i].vcpu_bo == NULL)
354 return -EINVAL;
355
356 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
357 ptr = adev->vcn.inst[i].cpu_addr;
358
359 if (adev->vcn.inst[i].saved_bo != NULL) {
360 if (drm_dev_enter(&adev->ddev, &idx)) {
361 memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
362 drm_dev_exit(idx);
363 }
364 kvfree(adev->vcn.inst[i].saved_bo);
365 adev->vcn.inst[i].saved_bo = NULL;
366 } else {
367 const struct common_firmware_header *hdr;
368 unsigned offset;
369
370 hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
371 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
372 offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
373 if (drm_dev_enter(&adev->ddev, &idx)) {
374 memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
375 le32_to_cpu(hdr->ucode_size_bytes));
376 drm_dev_exit(idx);
377 }
378 size -= le32_to_cpu(hdr->ucode_size_bytes);
379 ptr += le32_to_cpu(hdr->ucode_size_bytes);
380 }
381 memset_io(ptr, 0, size);
382 }
383 }
384 return 0;
385 }
386
amdgpu_vcn_idle_work_handler(struct work_struct * work)387 static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
388 {
389 struct amdgpu_device *adev =
390 container_of(work, struct amdgpu_device, vcn.idle_work.work);
391 unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
392 unsigned int i, j;
393 int r = 0;
394
395 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
396 if (adev->vcn.harvest_config & (1 << j))
397 continue;
398
399 for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
400 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
401 }
402
403 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
404 struct dpg_pause_state new_state;
405
406 if (fence[j] ||
407 unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
408 new_state.fw_based = VCN_DPG_STATE__PAUSE;
409 else
410 new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
411
412 adev->vcn.pause_dpg_mode(adev, j, &new_state);
413 }
414
415 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
416 fences += fence[j];
417 }
418
419 if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
420 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
421 AMD_PG_STATE_GATE);
422 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
423 false);
424 if (r)
425 dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
426 } else {
427 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
428 }
429 }
430
amdgpu_vcn_ring_begin_use(struct amdgpu_ring * ring)431 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
432 {
433 struct amdgpu_device *adev = ring->adev;
434 int r = 0;
435
436 atomic_inc(&adev->vcn.total_submission_cnt);
437
438 if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
439 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
440 true);
441 if (r)
442 dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
443 }
444
445 mutex_lock(&adev->vcn.vcn_pg_lock);
446 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
447 AMD_PG_STATE_UNGATE);
448
449 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
450 struct dpg_pause_state new_state;
451
452 if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
453 atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
454 new_state.fw_based = VCN_DPG_STATE__PAUSE;
455 } else {
456 unsigned int fences = 0;
457 unsigned int i;
458
459 for (i = 0; i < adev->vcn.num_enc_rings; ++i)
460 fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
461
462 if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
463 new_state.fw_based = VCN_DPG_STATE__PAUSE;
464 else
465 new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
466 }
467
468 adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
469 }
470 mutex_unlock(&adev->vcn.vcn_pg_lock);
471 }
472
amdgpu_vcn_ring_end_use(struct amdgpu_ring * ring)473 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
474 {
475 if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
476 ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
477 atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
478
479 atomic_dec(&ring->adev->vcn.total_submission_cnt);
480
481 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
482 }
483
amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring * ring)484 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
485 {
486 struct amdgpu_device *adev = ring->adev;
487 uint32_t tmp = 0;
488 unsigned i;
489 int r;
490
491 /* VCN in SRIOV does not support direct register read/write */
492 if (amdgpu_sriov_vf(adev))
493 return 0;
494
495 WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
496 r = amdgpu_ring_alloc(ring, 3);
497 if (r)
498 return r;
499 amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
500 amdgpu_ring_write(ring, 0xDEADBEEF);
501 amdgpu_ring_commit(ring);
502 for (i = 0; i < adev->usec_timeout; i++) {
503 tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
504 if (tmp == 0xDEADBEEF)
505 break;
506 udelay(1);
507 }
508
509 if (i >= adev->usec_timeout)
510 r = -ETIMEDOUT;
511
512 return r;
513 }
514
amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring * ring)515 int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
516 {
517 struct amdgpu_device *adev = ring->adev;
518 uint32_t rptr;
519 unsigned int i;
520 int r;
521
522 if (amdgpu_sriov_vf(adev))
523 return 0;
524
525 r = amdgpu_ring_alloc(ring, 16);
526 if (r)
527 return r;
528
529 rptr = amdgpu_ring_get_rptr(ring);
530
531 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
532 amdgpu_ring_commit(ring);
533
534 for (i = 0; i < adev->usec_timeout; i++) {
535 if (amdgpu_ring_get_rptr(ring) != rptr)
536 break;
537 udelay(1);
538 }
539
540 if (i >= adev->usec_timeout)
541 r = -ETIMEDOUT;
542
543 return r;
544 }
545
amdgpu_vcn_dec_send_msg(struct amdgpu_ring * ring,struct amdgpu_bo * bo,struct dma_fence ** fence)546 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
547 struct amdgpu_bo *bo,
548 struct dma_fence **fence)
549 {
550 struct amdgpu_device *adev = ring->adev;
551 struct dma_fence *f = NULL;
552 struct amdgpu_job *job;
553 struct amdgpu_ib *ib;
554 uint64_t addr;
555 void *msg = NULL;
556 int i, r;
557
558 r = amdgpu_job_alloc_with_ib(adev, 64,
559 AMDGPU_IB_POOL_DIRECT, &job);
560 if (r)
561 goto err;
562
563 ib = &job->ibs[0];
564 addr = amdgpu_bo_gpu_offset(bo);
565 msg = amdgpu_bo_kptr(bo);
566 ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
567 ib->ptr[1] = addr;
568 ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
569 ib->ptr[3] = addr >> 32;
570 ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
571 ib->ptr[5] = 0;
572 for (i = 6; i < 16; i += 2) {
573 ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
574 ib->ptr[i+1] = 0;
575 }
576 ib->length_dw = 16;
577
578 r = amdgpu_job_submit_direct(job, ring, &f);
579 if (r)
580 goto err_free;
581
582 amdgpu_bo_fence(bo, f, false);
583 amdgpu_bo_unreserve(bo);
584 amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
585
586 if (fence)
587 *fence = dma_fence_get(f);
588 dma_fence_put(f);
589
590 return 0;
591
592 err_free:
593 amdgpu_job_free(job);
594
595 err:
596 amdgpu_bo_unreserve(bo);
597 amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
598 return r;
599 }
600
amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring * ring,uint32_t handle,struct amdgpu_bo ** bo)601 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
602 struct amdgpu_bo **bo)
603 {
604 struct amdgpu_device *adev = ring->adev;
605 uint32_t *msg;
606 int r, i;
607
608 *bo = NULL;
609 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
610 AMDGPU_GEM_DOMAIN_VRAM,
611 bo, NULL, (void **)&msg);
612 if (r)
613 return r;
614
615 msg[0] = cpu_to_le32(0x00000028);
616 msg[1] = cpu_to_le32(0x00000038);
617 msg[2] = cpu_to_le32(0x00000001);
618 msg[3] = cpu_to_le32(0x00000000);
619 msg[4] = cpu_to_le32(handle);
620 msg[5] = cpu_to_le32(0x00000000);
621 msg[6] = cpu_to_le32(0x00000001);
622 msg[7] = cpu_to_le32(0x00000028);
623 msg[8] = cpu_to_le32(0x00000010);
624 msg[9] = cpu_to_le32(0x00000000);
625 msg[10] = cpu_to_le32(0x00000007);
626 msg[11] = cpu_to_le32(0x00000000);
627 msg[12] = cpu_to_le32(0x00000780);
628 msg[13] = cpu_to_le32(0x00000440);
629 for (i = 14; i < 1024; ++i)
630 msg[i] = cpu_to_le32(0x0);
631
632 return 0;
633 }
634
amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring * ring,uint32_t handle,struct amdgpu_bo ** bo)635 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
636 struct amdgpu_bo **bo)
637 {
638 struct amdgpu_device *adev = ring->adev;
639 uint32_t *msg;
640 int r, i;
641
642 *bo = NULL;
643 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
644 AMDGPU_GEM_DOMAIN_VRAM,
645 bo, NULL, (void **)&msg);
646 if (r)
647 return r;
648
649 msg[0] = cpu_to_le32(0x00000028);
650 msg[1] = cpu_to_le32(0x00000018);
651 msg[2] = cpu_to_le32(0x00000000);
652 msg[3] = cpu_to_le32(0x00000002);
653 msg[4] = cpu_to_le32(handle);
654 msg[5] = cpu_to_le32(0x00000000);
655 for (i = 6; i < 1024; ++i)
656 msg[i] = cpu_to_le32(0x0);
657
658 return 0;
659 }
660
amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring * ring,long timeout)661 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
662 {
663 struct dma_fence *fence = NULL;
664 struct amdgpu_bo *bo;
665 long r;
666
667 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
668 if (r)
669 goto error;
670
671 r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
672 if (r)
673 goto error;
674 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
675 if (r)
676 goto error;
677
678 r = amdgpu_vcn_dec_send_msg(ring, bo, &fence);
679 if (r)
680 goto error;
681
682 r = dma_fence_wait_timeout(fence, false, timeout);
683 if (r == 0)
684 r = -ETIMEDOUT;
685 else if (r > 0)
686 r = 0;
687
688 dma_fence_put(fence);
689 error:
690 return r;
691 }
692
amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring * ring,struct amdgpu_bo * bo,struct dma_fence ** fence)693 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
694 struct amdgpu_bo *bo,
695 struct dma_fence **fence)
696 {
697 struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
698 const unsigned int ib_size_dw = 64;
699 struct amdgpu_device *adev = ring->adev;
700 struct dma_fence *f = NULL;
701 struct amdgpu_job *job;
702 struct amdgpu_ib *ib;
703 uint64_t addr;
704 int i, r;
705
706 r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
707 AMDGPU_IB_POOL_DIRECT, &job);
708 if (r)
709 goto err;
710
711 ib = &job->ibs[0];
712 addr = amdgpu_bo_gpu_offset(bo);
713 ib->length_dw = 0;
714
715 ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
716 ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
717 decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
718 ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4;
719 memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer));
720
721 decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER);
722 decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32);
723 decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr);
724
725 for (i = ib->length_dw; i < ib_size_dw; ++i)
726 ib->ptr[i] = 0x0;
727
728 r = amdgpu_job_submit_direct(job, ring, &f);
729 if (r)
730 goto err_free;
731
732 amdgpu_bo_fence(bo, f, false);
733 amdgpu_bo_unreserve(bo);
734 amdgpu_bo_unref(&bo);
735
736 if (fence)
737 *fence = dma_fence_get(f);
738 dma_fence_put(f);
739
740 return 0;
741
742 err_free:
743 amdgpu_job_free(job);
744
745 err:
746 amdgpu_bo_unreserve(bo);
747 amdgpu_bo_unref(&bo);
748 return r;
749 }
750
amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring * ring,long timeout)751 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
752 {
753 struct dma_fence *fence = NULL;
754 struct amdgpu_bo *bo;
755 long r;
756
757 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
758 if (r)
759 goto error;
760
761 r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL);
762 if (r)
763 goto error;
764 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
765 if (r)
766 goto error;
767
768 r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence);
769 if (r)
770 goto error;
771
772 r = dma_fence_wait_timeout(fence, false, timeout);
773 if (r == 0)
774 r = -ETIMEDOUT;
775 else if (r > 0)
776 r = 0;
777
778 dma_fence_put(fence);
779 error:
780 return r;
781 }
782
amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring * ring)783 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
784 {
785 struct amdgpu_device *adev = ring->adev;
786 uint32_t rptr;
787 unsigned i;
788 int r;
789
790 if (amdgpu_sriov_vf(adev))
791 return 0;
792
793 r = amdgpu_ring_alloc(ring, 16);
794 if (r)
795 return r;
796
797 rptr = amdgpu_ring_get_rptr(ring);
798
799 amdgpu_ring_write(ring, VCN_ENC_CMD_END);
800 amdgpu_ring_commit(ring);
801
802 for (i = 0; i < adev->usec_timeout; i++) {
803 if (amdgpu_ring_get_rptr(ring) != rptr)
804 break;
805 udelay(1);
806 }
807
808 if (i >= adev->usec_timeout)
809 r = -ETIMEDOUT;
810
811 return r;
812 }
813
amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring * ring,uint32_t handle,struct amdgpu_bo * bo,struct dma_fence ** fence)814 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
815 struct amdgpu_bo *bo,
816 struct dma_fence **fence)
817 {
818 const unsigned ib_size_dw = 16;
819 struct amdgpu_job *job;
820 struct amdgpu_ib *ib;
821 struct dma_fence *f = NULL;
822 uint64_t addr;
823 int i, r;
824
825 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
826 AMDGPU_IB_POOL_DIRECT, &job);
827 if (r)
828 return r;
829
830 ib = &job->ibs[0];
831 addr = amdgpu_bo_gpu_offset(bo);
832
833 ib->length_dw = 0;
834 ib->ptr[ib->length_dw++] = 0x00000018;
835 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
836 ib->ptr[ib->length_dw++] = handle;
837 ib->ptr[ib->length_dw++] = upper_32_bits(addr);
838 ib->ptr[ib->length_dw++] = addr;
839 ib->ptr[ib->length_dw++] = 0x0000000b;
840
841 ib->ptr[ib->length_dw++] = 0x00000014;
842 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
843 ib->ptr[ib->length_dw++] = 0x0000001c;
844 ib->ptr[ib->length_dw++] = 0x00000000;
845 ib->ptr[ib->length_dw++] = 0x00000000;
846
847 ib->ptr[ib->length_dw++] = 0x00000008;
848 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
849
850 for (i = ib->length_dw; i < ib_size_dw; ++i)
851 ib->ptr[i] = 0x0;
852
853 r = amdgpu_job_submit_direct(job, ring, &f);
854 if (r)
855 goto err;
856
857 if (fence)
858 *fence = dma_fence_get(f);
859 dma_fence_put(f);
860
861 return 0;
862
863 err:
864 amdgpu_job_free(job);
865 return r;
866 }
867
amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring * ring,uint32_t handle,struct amdgpu_bo * bo,struct dma_fence ** fence)868 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
869 struct amdgpu_bo *bo,
870 struct dma_fence **fence)
871 {
872 const unsigned ib_size_dw = 16;
873 struct amdgpu_job *job;
874 struct amdgpu_ib *ib;
875 struct dma_fence *f = NULL;
876 uint64_t addr;
877 int i, r;
878
879 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
880 AMDGPU_IB_POOL_DIRECT, &job);
881 if (r)
882 return r;
883
884 ib = &job->ibs[0];
885 addr = amdgpu_bo_gpu_offset(bo);
886
887 ib->length_dw = 0;
888 ib->ptr[ib->length_dw++] = 0x00000018;
889 ib->ptr[ib->length_dw++] = 0x00000001;
890 ib->ptr[ib->length_dw++] = handle;
891 ib->ptr[ib->length_dw++] = upper_32_bits(addr);
892 ib->ptr[ib->length_dw++] = addr;
893 ib->ptr[ib->length_dw++] = 0x0000000b;
894
895 ib->ptr[ib->length_dw++] = 0x00000014;
896 ib->ptr[ib->length_dw++] = 0x00000002;
897 ib->ptr[ib->length_dw++] = 0x0000001c;
898 ib->ptr[ib->length_dw++] = 0x00000000;
899 ib->ptr[ib->length_dw++] = 0x00000000;
900
901 ib->ptr[ib->length_dw++] = 0x00000008;
902 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
903
904 for (i = ib->length_dw; i < ib_size_dw; ++i)
905 ib->ptr[i] = 0x0;
906
907 r = amdgpu_job_submit_direct(job, ring, &f);
908 if (r)
909 goto err;
910
911 if (fence)
912 *fence = dma_fence_get(f);
913 dma_fence_put(f);
914
915 return 0;
916
917 err:
918 amdgpu_job_free(job);
919 return r;
920 }
921
amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring * ring,long timeout)922 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
923 {
924 struct dma_fence *fence = NULL;
925 struct amdgpu_bo *bo = NULL;
926 long r;
927
928 r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
929 AMDGPU_GEM_DOMAIN_VRAM,
930 &bo, NULL, NULL);
931 if (r)
932 return r;
933
934 r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
935 if (r)
936 goto error;
937
938 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
939 if (r)
940 goto error;
941
942 r = dma_fence_wait_timeout(fence, false, timeout);
943 if (r == 0)
944 r = -ETIMEDOUT;
945 else if (r > 0)
946 r = 0;
947
948 error:
949 dma_fence_put(fence);
950 amdgpu_bo_unreserve(bo);
951 amdgpu_bo_free_kernel(&bo, NULL, NULL);
952
953 return r;
954 }
955