1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27 #include <linux/firmware.h>
28 #include <drm/drm_drv.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "soc15_common.h"
35 #include "mmsch_v1_0.h"
36
37 #include "vce/vce_4_0_offset.h"
38 #include "vce/vce_4_0_default.h"
39 #include "vce/vce_4_0_sh_mask.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "mmhub/mmhub_1_0_sh_mask.h"
42
43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44
45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
46
47 #define VCE_V4_0_FW_SIZE (384 * 1024)
48 #define VCE_V4_0_STACK_SIZE (64 * 1024)
49 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50
51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54
55 /**
56 * vce_v4_0_ring_get_rptr - get read pointer
57 *
58 * @ring: amdgpu_ring pointer
59 *
60 * Returns the current hardware read pointer
61 */
vce_v4_0_ring_get_rptr(struct amdgpu_ring * ring)62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63 {
64 struct amdgpu_device *adev = ring->adev;
65
66 if (ring->me == 0)
67 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 else if (ring->me == 1)
69 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 else
71 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72 }
73
74 /**
75 * vce_v4_0_ring_get_wptr - get write pointer
76 *
77 * @ring: amdgpu_ring pointer
78 *
79 * Returns the current hardware write pointer
80 */
vce_v4_0_ring_get_wptr(struct amdgpu_ring * ring)81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82 {
83 struct amdgpu_device *adev = ring->adev;
84
85 if (ring->use_doorbell)
86 return adev->wb.wb[ring->wptr_offs];
87
88 if (ring->me == 0)
89 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 else if (ring->me == 1)
91 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 else
93 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94 }
95
96 /**
97 * vce_v4_0_ring_set_wptr - set write pointer
98 *
99 * @ring: amdgpu_ring pointer
100 *
101 * Commits the write pointer to the hardware
102 */
vce_v4_0_ring_set_wptr(struct amdgpu_ring * ring)103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104 {
105 struct amdgpu_device *adev = ring->adev;
106
107 if (ring->use_doorbell) {
108 /* XXX check if swapping is necessary on BE */
109 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
110 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 return;
112 }
113
114 if (ring->me == 0)
115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 lower_32_bits(ring->wptr));
117 else if (ring->me == 1)
118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 lower_32_bits(ring->wptr));
120 else
121 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 lower_32_bits(ring->wptr));
123 }
124
vce_v4_0_firmware_loaded(struct amdgpu_device * adev)125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126 {
127 int i, j;
128
129 for (i = 0; i < 10; ++i) {
130 for (j = 0; j < 100; ++j) {
131 uint32_t status =
132 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133
134 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 return 0;
136 mdelay(10);
137 }
138
139 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 mdelay(10);
144 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 mdelay(10);
147
148 }
149
150 return -ETIMEDOUT;
151 }
152
vce_v4_0_mmsch_start(struct amdgpu_device * adev,struct amdgpu_mm_table * table)153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 struct amdgpu_mm_table *table)
155 {
156 uint32_t data = 0, loop;
157 uint64_t addr = table->gpu_addr;
158 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 uint32_t size;
160
161 size = header->header_size + header->vce_table_size + header->uvd_table_size;
162
163 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166
167 /* 2, update vmid of descriptor */
168 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172
173 /* 3, notify mmsch about the size of this descriptor */
174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175
176 /* 4, set resp to zero */
177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178
179 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
181 adev->vce.ring[0].wptr = 0;
182 adev->vce.ring[0].wptr_old = 0;
183
184 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186
187 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 loop = 1000;
189 while ((data & 0x10000002) != 0x10000002) {
190 udelay(10);
191 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 loop--;
193 if (!loop)
194 break;
195 }
196
197 if (!loop) {
198 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 return -EBUSY;
200 }
201
202 return 0;
203 }
204
vce_v4_0_sriov_start(struct amdgpu_device * adev)205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206 {
207 struct amdgpu_ring *ring;
208 uint32_t offset, size;
209 uint32_t table_size = 0;
210 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 struct mmsch_v1_0_cmd_end end = { { 0 } };
214 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216
217 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 end.cmd_header.command_type = MMSCH_COMMAND__END;
221
222 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 header->version = MMSCH_VERSION;
224 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225
226 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 header->vce_table_offset = header->header_size;
228 else
229 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230
231 init_table += header->vce_table_offset;
232
233 ring = &adev->vce.ring[0];
234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 lower_32_bits(ring->gpu_addr));
236 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 upper_32_bits(ring->gpu_addr));
238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 ring->ring_size / 4);
240
241 /* BEGING OF MC_RESUME */
242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247
248 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253
254 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 (tmr_mc_addr >> 40) & 0xff);
259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 } else {
261 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 adev->vce.gpu_addr >> 8);
264 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 (adev->vce.gpu_addr >> 40) & 0xff);
267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 offset & ~0x0f000000);
269
270 }
271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 adev->vce.gpu_addr >> 8);
274 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 (adev->vce.gpu_addr >> 40) & 0xff);
277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 adev->vce.gpu_addr >> 8);
280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 (adev->vce.gpu_addr >> 40) & 0xff);
283
284 size = VCE_V4_0_FW_SIZE;
285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286
287 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 size = VCE_V4_0_STACK_SIZE;
289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 (offset & ~0x0f000000) | (1 << 24));
291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292
293 offset += size;
294 size = VCE_V4_0_DATA_SIZE;
295 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 (offset & ~0x0f000000) | (2 << 24));
297 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298
299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303
304 /* end of MC_RESUME */
305 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311
312 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315
316 /* clear BUSY flag */
317 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 ~VCE_STATUS__JOB_BUSY_MASK, 0);
319
320 /* add end packet */
321 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 header->vce_table_size = table_size;
324 }
325
326 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327 }
328
329 /**
330 * vce_v4_0_start - start VCE block
331 *
332 * @adev: amdgpu_device pointer
333 *
334 * Setup and start the VCE block
335 */
vce_v4_0_start(struct amdgpu_device * adev)336 static int vce_v4_0_start(struct amdgpu_device *adev)
337 {
338 struct amdgpu_ring *ring;
339 int r;
340
341 ring = &adev->vce.ring[0];
342
343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348
349 ring = &adev->vce.ring[1];
350
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356
357 ring = &adev->vce.ring[2];
358
359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364
365 vce_v4_0_mc_resume(adev);
366 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 ~VCE_STATUS__JOB_BUSY_MASK);
368
369 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370
371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 mdelay(100);
374
375 r = vce_v4_0_firmware_loaded(adev);
376
377 /* clear BUSY flag */
378 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379
380 if (r) {
381 DRM_ERROR("VCE not responding, giving up!!!\n");
382 return r;
383 }
384
385 return 0;
386 }
387
vce_v4_0_stop(struct amdgpu_device * adev)388 static int vce_v4_0_stop(struct amdgpu_device *adev)
389 {
390
391 /* Disable VCPU */
392 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393
394 /* hold on ECPU */
395 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398
399 /* clear VCE_STATUS */
400 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401
402 /* Set Clock-Gating off */
403 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 */
406
407 return 0;
408 }
409
vce_v4_0_early_init(void * handle)410 static int vce_v4_0_early_init(void *handle)
411 {
412 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413
414 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 adev->vce.num_rings = 1;
416 else
417 adev->vce.num_rings = 3;
418
419 vce_v4_0_set_ring_funcs(adev);
420 vce_v4_0_set_irq_funcs(adev);
421
422 return 0;
423 }
424
vce_v4_0_sw_init(void * handle)425 static int vce_v4_0_sw_init(void *handle)
426 {
427 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 struct amdgpu_ring *ring;
429
430 unsigned size;
431 int r, i;
432
433 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 if (r)
435 return r;
436
437 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 size += VCE_V4_0_FW_SIZE;
440
441 r = amdgpu_vce_sw_init(adev, size);
442 if (r)
443 return r;
444
445 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 const struct common_firmware_header *hdr;
447 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448
449 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 if (!adev->vce.saved_bo)
451 return -ENOMEM;
452
453 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 adev->firmware.fw_size +=
457 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 DRM_INFO("PSP loading VCE firmware\n");
459 } else {
460 r = amdgpu_vce_resume(adev);
461 if (r)
462 return r;
463 }
464
465 for (i = 0; i < adev->vce.num_rings; i++) {
466 ring = &adev->vce.ring[i];
467 sprintf(ring->name, "vce%d", i);
468 if (amdgpu_sriov_vf(adev)) {
469 /* DOORBELL only works under SRIOV */
470 ring->use_doorbell = true;
471
472 /* currently only use the first encoding ring for sriov,
473 * so set unused location for other unused rings.
474 */
475 if (i == 0)
476 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
477 else
478 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
479 }
480 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
481 AMDGPU_RING_PRIO_DEFAULT, NULL);
482 if (r)
483 return r;
484 }
485
486
487 r = amdgpu_vce_entity_init(adev);
488 if (r)
489 return r;
490
491 r = amdgpu_virt_alloc_mm_table(adev);
492 if (r)
493 return r;
494
495 return r;
496 }
497
vce_v4_0_sw_fini(void * handle)498 static int vce_v4_0_sw_fini(void *handle)
499 {
500 int r;
501 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
502
503 /* free MM table */
504 amdgpu_virt_free_mm_table(adev);
505
506 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
507 kvfree(adev->vce.saved_bo);
508 adev->vce.saved_bo = NULL;
509 }
510
511 r = amdgpu_vce_suspend(adev);
512 if (r)
513 return r;
514
515 return amdgpu_vce_sw_fini(adev);
516 }
517
vce_v4_0_hw_init(void * handle)518 static int vce_v4_0_hw_init(void *handle)
519 {
520 int r, i;
521 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
522
523 if (amdgpu_sriov_vf(adev))
524 r = vce_v4_0_sriov_start(adev);
525 else
526 r = vce_v4_0_start(adev);
527 if (r)
528 return r;
529
530 for (i = 0; i < adev->vce.num_rings; i++) {
531 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
532 if (r)
533 return r;
534 }
535
536 DRM_INFO("VCE initialized successfully.\n");
537
538 return 0;
539 }
540
vce_v4_0_hw_fini(void * handle)541 static int vce_v4_0_hw_fini(void *handle)
542 {
543 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
544
545 cancel_delayed_work_sync(&adev->vce.idle_work);
546
547 if (!amdgpu_sriov_vf(adev)) {
548 /* vce_v4_0_wait_for_idle(handle); */
549 vce_v4_0_stop(adev);
550 } else {
551 /* full access mode, so don't touch any VCE register */
552 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
553 }
554
555 return 0;
556 }
557
vce_v4_0_suspend(void * handle)558 static int vce_v4_0_suspend(void *handle)
559 {
560 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
561 int r, idx;
562
563 if (adev->vce.vcpu_bo == NULL)
564 return 0;
565
566 if (drm_dev_enter(&adev->ddev, &idx)) {
567 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
568 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
569 void *ptr = adev->vce.cpu_addr;
570
571 memcpy_fromio(adev->vce.saved_bo, ptr, size);
572 }
573 drm_dev_exit(idx);
574 }
575
576 /*
577 * Proper cleanups before halting the HW engine:
578 * - cancel the delayed idle work
579 * - enable powergating
580 * - enable clockgating
581 * - disable dpm
582 *
583 * TODO: to align with the VCN implementation, move the
584 * jobs for clockgating/powergating/dpm setting to
585 * ->set_powergating_state().
586 */
587 cancel_delayed_work_sync(&adev->vce.idle_work);
588
589 if (adev->pm.dpm_enabled) {
590 amdgpu_dpm_enable_vce(adev, false);
591 } else {
592 amdgpu_asic_set_vce_clocks(adev, 0, 0);
593 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
594 AMD_PG_STATE_GATE);
595 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
596 AMD_CG_STATE_GATE);
597 }
598
599 r = vce_v4_0_hw_fini(adev);
600 if (r)
601 return r;
602
603 return amdgpu_vce_suspend(adev);
604 }
605
vce_v4_0_resume(void * handle)606 static int vce_v4_0_resume(void *handle)
607 {
608 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
609 int r, idx;
610
611 if (adev->vce.vcpu_bo == NULL)
612 return -EINVAL;
613
614 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
615
616 if (drm_dev_enter(&adev->ddev, &idx)) {
617 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
618 void *ptr = adev->vce.cpu_addr;
619
620 memcpy_toio(ptr, adev->vce.saved_bo, size);
621 drm_dev_exit(idx);
622 }
623 } else {
624 r = amdgpu_vce_resume(adev);
625 if (r)
626 return r;
627 }
628
629 return vce_v4_0_hw_init(adev);
630 }
631
vce_v4_0_mc_resume(struct amdgpu_device * adev)632 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
633 {
634 uint32_t offset, size;
635 uint64_t tmr_mc_addr;
636
637 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
638 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
639 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
641
642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
643 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
645 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
647
648 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
649
650 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
651 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
652 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
653 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
654 (tmr_mc_addr >> 8));
655 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
656 (tmr_mc_addr >> 40) & 0xff);
657 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
658 } else {
659 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
660 (adev->vce.gpu_addr >> 8));
661 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
662 (adev->vce.gpu_addr >> 40) & 0xff);
663 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
664 }
665
666 size = VCE_V4_0_FW_SIZE;
667 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
668
669 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
670 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
671 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
672 size = VCE_V4_0_STACK_SIZE;
673 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
674 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
675
676 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
677 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
678 offset += size;
679 size = VCE_V4_0_DATA_SIZE;
680 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
681 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
682
683 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
684 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
685 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
686 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
687 }
688
vce_v4_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)689 static int vce_v4_0_set_clockgating_state(void *handle,
690 enum amd_clockgating_state state)
691 {
692 /* needed for driver unload*/
693 return 0;
694 }
695
696 #if 0
697 static bool vce_v4_0_is_idle(void *handle)
698 {
699 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
700 u32 mask = 0;
701
702 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
703 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
704
705 return !(RREG32(mmSRBM_STATUS2) & mask);
706 }
707
708 static int vce_v4_0_wait_for_idle(void *handle)
709 {
710 unsigned i;
711 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
712
713 for (i = 0; i < adev->usec_timeout; i++)
714 if (vce_v4_0_is_idle(handle))
715 return 0;
716
717 return -ETIMEDOUT;
718 }
719
720 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
721 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
722 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
723 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
724 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
725
726 static bool vce_v4_0_check_soft_reset(void *handle)
727 {
728 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
729 u32 srbm_soft_reset = 0;
730
731 /* According to VCE team , we should use VCE_STATUS instead
732 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
733 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
734 * instance's registers are accessed
735 * (0 for 1st instance, 10 for 2nd instance).
736 *
737 *VCE_STATUS
738 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
739 *|----+----+-----------+----+----+----+----------+---------+----|
740 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
741 *
742 * VCE team suggest use bit 3--bit 6 for busy status check
743 */
744 mutex_lock(&adev->grbm_idx_mutex);
745 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
746 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
747 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
748 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
749 }
750 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
751 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
752 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
753 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
754 }
755 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
756 mutex_unlock(&adev->grbm_idx_mutex);
757
758 if (srbm_soft_reset) {
759 adev->vce.srbm_soft_reset = srbm_soft_reset;
760 return true;
761 } else {
762 adev->vce.srbm_soft_reset = 0;
763 return false;
764 }
765 }
766
767 static int vce_v4_0_soft_reset(void *handle)
768 {
769 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
770 u32 srbm_soft_reset;
771
772 if (!adev->vce.srbm_soft_reset)
773 return 0;
774 srbm_soft_reset = adev->vce.srbm_soft_reset;
775
776 if (srbm_soft_reset) {
777 u32 tmp;
778
779 tmp = RREG32(mmSRBM_SOFT_RESET);
780 tmp |= srbm_soft_reset;
781 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
782 WREG32(mmSRBM_SOFT_RESET, tmp);
783 tmp = RREG32(mmSRBM_SOFT_RESET);
784
785 udelay(50);
786
787 tmp &= ~srbm_soft_reset;
788 WREG32(mmSRBM_SOFT_RESET, tmp);
789 tmp = RREG32(mmSRBM_SOFT_RESET);
790
791 /* Wait a little for things to settle down */
792 udelay(50);
793 }
794
795 return 0;
796 }
797
798 static int vce_v4_0_pre_soft_reset(void *handle)
799 {
800 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
801
802 if (!adev->vce.srbm_soft_reset)
803 return 0;
804
805 mdelay(5);
806
807 return vce_v4_0_suspend(adev);
808 }
809
810
811 static int vce_v4_0_post_soft_reset(void *handle)
812 {
813 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
814
815 if (!adev->vce.srbm_soft_reset)
816 return 0;
817
818 mdelay(5);
819
820 return vce_v4_0_resume(adev);
821 }
822
823 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
824 {
825 u32 tmp, data;
826
827 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
828 if (override)
829 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
830 else
831 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
832
833 if (tmp != data)
834 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
835 }
836
837 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
838 bool gated)
839 {
840 u32 data;
841
842 /* Set Override to disable Clock Gating */
843 vce_v4_0_override_vce_clock_gating(adev, true);
844
845 /* This function enables MGCG which is controlled by firmware.
846 With the clocks in the gated state the core is still
847 accessible but the firmware will throttle the clocks on the
848 fly as necessary.
849 */
850 if (gated) {
851 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
852 data |= 0x1ff;
853 data &= ~0xef0000;
854 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
855
856 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
857 data |= 0x3ff000;
858 data &= ~0xffc00000;
859 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
860
861 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
862 data |= 0x2;
863 data &= ~0x00010000;
864 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
865
866 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
867 data |= 0x37f;
868 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
869
870 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
871 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
872 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
873 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
874 0x8;
875 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
876 } else {
877 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
878 data &= ~0x80010;
879 data |= 0xe70008;
880 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
881
882 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
883 data |= 0xffc00000;
884 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
885
886 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
887 data |= 0x10000;
888 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
889
890 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
891 data &= ~0xffc00000;
892 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
893
894 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
895 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
896 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
897 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
898 0x8);
899 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
900 }
901 vce_v4_0_override_vce_clock_gating(adev, false);
902 }
903
904 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
905 {
906 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
907
908 if (enable)
909 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
910 else
911 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
912
913 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
914 }
915
916 static int vce_v4_0_set_clockgating_state(void *handle,
917 enum amd_clockgating_state state)
918 {
919 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
920 bool enable = (state == AMD_CG_STATE_GATE);
921 int i;
922
923 if ((adev->asic_type == CHIP_POLARIS10) ||
924 (adev->asic_type == CHIP_TONGA) ||
925 (adev->asic_type == CHIP_FIJI))
926 vce_v4_0_set_bypass_mode(adev, enable);
927
928 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
929 return 0;
930
931 mutex_lock(&adev->grbm_idx_mutex);
932 for (i = 0; i < 2; i++) {
933 /* Program VCE Instance 0 or 1 if not harvested */
934 if (adev->vce.harvest_config & (1 << i))
935 continue;
936
937 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
938
939 if (enable) {
940 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
941 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
942 data &= ~(0xf | 0xff0);
943 data |= ((0x0 << 0) | (0x04 << 4));
944 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
945
946 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
947 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
948 data &= ~(0xf | 0xff0);
949 data |= ((0x0 << 0) | (0x04 << 4));
950 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
951 }
952
953 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
954 }
955
956 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
957 mutex_unlock(&adev->grbm_idx_mutex);
958
959 return 0;
960 }
961 #endif
962
vce_v4_0_set_powergating_state(void * handle,enum amd_powergating_state state)963 static int vce_v4_0_set_powergating_state(void *handle,
964 enum amd_powergating_state state)
965 {
966 /* This doesn't actually powergate the VCE block.
967 * That's done in the dpm code via the SMC. This
968 * just re-inits the block as necessary. The actual
969 * gating still happens in the dpm code. We should
970 * revisit this when there is a cleaner line between
971 * the smc and the hw blocks
972 */
973 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
974
975 if (state == AMD_PG_STATE_GATE)
976 return vce_v4_0_stop(adev);
977 else
978 return vce_v4_0_start(adev);
979 }
980
vce_v4_0_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)981 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
982 struct amdgpu_ib *ib, uint32_t flags)
983 {
984 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
985
986 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
987 amdgpu_ring_write(ring, vmid);
988 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
989 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
990 amdgpu_ring_write(ring, ib->length_dw);
991 }
992
vce_v4_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)993 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
994 u64 seq, unsigned flags)
995 {
996 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
997
998 amdgpu_ring_write(ring, VCE_CMD_FENCE);
999 amdgpu_ring_write(ring, addr);
1000 amdgpu_ring_write(ring, upper_32_bits(addr));
1001 amdgpu_ring_write(ring, seq);
1002 amdgpu_ring_write(ring, VCE_CMD_TRAP);
1003 }
1004
vce_v4_0_ring_insert_end(struct amdgpu_ring * ring)1005 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1006 {
1007 amdgpu_ring_write(ring, VCE_CMD_END);
1008 }
1009
vce_v4_0_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)1010 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1011 uint32_t val, uint32_t mask)
1012 {
1013 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1014 amdgpu_ring_write(ring, reg << 2);
1015 amdgpu_ring_write(ring, mask);
1016 amdgpu_ring_write(ring, val);
1017 }
1018
vce_v4_0_emit_vm_flush(struct amdgpu_ring * ring,unsigned int vmid,uint64_t pd_addr)1019 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1020 unsigned int vmid, uint64_t pd_addr)
1021 {
1022 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1023
1024 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1025
1026 /* wait for reg writes */
1027 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1028 vmid * hub->ctx_addr_distance,
1029 lower_32_bits(pd_addr), 0xffffffff);
1030 }
1031
vce_v4_0_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)1032 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1033 uint32_t reg, uint32_t val)
1034 {
1035 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1036 amdgpu_ring_write(ring, reg << 2);
1037 amdgpu_ring_write(ring, val);
1038 }
1039
vce_v4_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)1040 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1041 struct amdgpu_irq_src *source,
1042 unsigned type,
1043 enum amdgpu_interrupt_state state)
1044 {
1045 uint32_t val = 0;
1046
1047 if (!amdgpu_sriov_vf(adev)) {
1048 if (state == AMDGPU_IRQ_STATE_ENABLE)
1049 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1050
1051 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1052 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1053 }
1054 return 0;
1055 }
1056
vce_v4_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1057 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1058 struct amdgpu_irq_src *source,
1059 struct amdgpu_iv_entry *entry)
1060 {
1061 DRM_DEBUG("IH: VCE\n");
1062
1063 switch (entry->src_data[0]) {
1064 case 0:
1065 case 1:
1066 case 2:
1067 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1068 break;
1069 default:
1070 DRM_ERROR("Unhandled interrupt: %d %d\n",
1071 entry->src_id, entry->src_data[0]);
1072 break;
1073 }
1074
1075 return 0;
1076 }
1077
1078 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1079 .name = "vce_v4_0",
1080 .early_init = vce_v4_0_early_init,
1081 .late_init = NULL,
1082 .sw_init = vce_v4_0_sw_init,
1083 .sw_fini = vce_v4_0_sw_fini,
1084 .hw_init = vce_v4_0_hw_init,
1085 .hw_fini = vce_v4_0_hw_fini,
1086 .suspend = vce_v4_0_suspend,
1087 .resume = vce_v4_0_resume,
1088 .is_idle = NULL /* vce_v4_0_is_idle */,
1089 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1090 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1091 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1092 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1093 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1094 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1095 .set_powergating_state = vce_v4_0_set_powergating_state,
1096 };
1097
1098 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1099 .type = AMDGPU_RING_TYPE_VCE,
1100 .align_mask = 0x3f,
1101 .nop = VCE_CMD_NO_OP,
1102 .support_64bit_ptrs = false,
1103 .no_user_fence = true,
1104 .vmhub = AMDGPU_MMHUB_0,
1105 .get_rptr = vce_v4_0_ring_get_rptr,
1106 .get_wptr = vce_v4_0_ring_get_wptr,
1107 .set_wptr = vce_v4_0_ring_set_wptr,
1108 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1109 .emit_frame_size =
1110 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1111 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1112 4 + /* vce_v4_0_emit_vm_flush */
1113 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1114 1, /* vce_v4_0_ring_insert_end */
1115 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1116 .emit_ib = vce_v4_0_ring_emit_ib,
1117 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1118 .emit_fence = vce_v4_0_ring_emit_fence,
1119 .test_ring = amdgpu_vce_ring_test_ring,
1120 .test_ib = amdgpu_vce_ring_test_ib,
1121 .insert_nop = amdgpu_ring_insert_nop,
1122 .insert_end = vce_v4_0_ring_insert_end,
1123 .pad_ib = amdgpu_ring_generic_pad_ib,
1124 .begin_use = amdgpu_vce_ring_begin_use,
1125 .end_use = amdgpu_vce_ring_end_use,
1126 .emit_wreg = vce_v4_0_emit_wreg,
1127 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1128 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1129 };
1130
vce_v4_0_set_ring_funcs(struct amdgpu_device * adev)1131 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1132 {
1133 int i;
1134
1135 for (i = 0; i < adev->vce.num_rings; i++) {
1136 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1137 adev->vce.ring[i].me = i;
1138 }
1139 DRM_INFO("VCE enabled in VM mode\n");
1140 }
1141
1142 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1143 .set = vce_v4_0_set_interrupt_state,
1144 .process = vce_v4_0_process_interrupt,
1145 };
1146
vce_v4_0_set_irq_funcs(struct amdgpu_device * adev)1147 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1148 {
1149 adev->vce.irq.num_types = 1;
1150 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1151 };
1152
1153 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1154 {
1155 .type = AMD_IP_BLOCK_TYPE_VCE,
1156 .major = 4,
1157 .minor = 0,
1158 .rev = 0,
1159 .funcs = &vce_v4_0_ip_funcs,
1160 };
1161