• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/module.h>
24 #include <linux/fdtable.h>
25 #include <linux/uaccess.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "cikd.h"
29 #include "cik_reg.h"
30 #include "radeon_kfd.h"
31 #include "radeon_ucode.h"
32 #include <linux/firmware.h>
33 #include "cik_structs.h"
34 
35 #define CIK_PIPE_PER_MEC	(4)
36 
37 static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
38 	TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
39 	TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
40 	TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
41 	TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
42 };
43 
44 struct kgd_mem {
45 	struct radeon_bo *bo;
46 	uint64_t gpu_addr;
47 	void *cpu_ptr;
48 };
49 
50 
51 static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
52 			void **mem_obj, uint64_t *gpu_addr,
53 			void **cpu_ptr);
54 
55 static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
56 
57 static uint64_t get_vmem_size(struct kgd_dev *kgd);
58 static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
59 
60 static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
61 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
62 
63 /*
64  * Register access functions
65  */
66 
67 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
68 		uint32_t sh_mem_config,	uint32_t sh_mem_ape1_base,
69 		uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
70 
71 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
72 					unsigned int vmid);
73 
74 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
75 				uint32_t hpd_size, uint64_t hpd_gpu_addr);
76 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
77 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
78 			uint32_t queue_id, uint32_t __user *wptr);
79 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
80 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
81 				uint32_t pipe_id, uint32_t queue_id);
82 
83 static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
84 				unsigned int timeout, uint32_t pipe_id,
85 				uint32_t queue_id);
86 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
87 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
88 				unsigned int timeout);
89 static int kgd_address_watch_disable(struct kgd_dev *kgd);
90 static int kgd_address_watch_execute(struct kgd_dev *kgd,
91 					unsigned int watch_point_id,
92 					uint32_t cntl_val,
93 					uint32_t addr_hi,
94 					uint32_t addr_lo);
95 static int kgd_wave_control_execute(struct kgd_dev *kgd,
96 					uint32_t gfx_index_val,
97 					uint32_t sq_cmd);
98 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
99 					unsigned int watch_point_id,
100 					unsigned int reg_offset);
101 
102 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
103 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
104 							uint8_t vmid);
105 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
106 
107 static const struct kfd2kgd_calls kfd2kgd = {
108 	.init_gtt_mem_allocation = alloc_gtt_mem,
109 	.free_gtt_mem = free_gtt_mem,
110 	.get_vmem_size = get_vmem_size,
111 	.get_gpu_clock_counter = get_gpu_clock_counter,
112 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
113 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
114 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
115 	.init_pipeline = kgd_init_pipeline,
116 	.init_interrupts = kgd_init_interrupts,
117 	.hqd_load = kgd_hqd_load,
118 	.hqd_sdma_load = kgd_hqd_sdma_load,
119 	.hqd_is_occupied = kgd_hqd_is_occupied,
120 	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
121 	.hqd_destroy = kgd_hqd_destroy,
122 	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
123 	.address_watch_disable = kgd_address_watch_disable,
124 	.address_watch_execute = kgd_address_watch_execute,
125 	.wave_control_execute = kgd_wave_control_execute,
126 	.address_watch_get_offset = kgd_address_watch_get_offset,
127 	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
128 	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
129 	.write_vmid_invalidate_request = write_vmid_invalidate_request,
130 	.get_fw_version = get_fw_version
131 };
132 
133 static const struct kgd2kfd_calls *kgd2kfd;
134 
radeon_kfd_init(void)135 bool radeon_kfd_init(void)
136 {
137 #if defined(CONFIG_HSA_AMD_MODULE)
138 	bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
139 
140 	kgd2kfd_init_p = symbol_request(kgd2kfd_init);
141 
142 	if (kgd2kfd_init_p == NULL)
143 		return false;
144 
145 	if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
146 		symbol_put(kgd2kfd_init);
147 		kgd2kfd = NULL;
148 
149 		return false;
150 	}
151 
152 	return true;
153 #elif defined(CONFIG_HSA_AMD)
154 	if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
155 		kgd2kfd = NULL;
156 
157 		return false;
158 	}
159 
160 	return true;
161 #else
162 	return false;
163 #endif
164 }
165 
radeon_kfd_fini(void)166 void radeon_kfd_fini(void)
167 {
168 	if (kgd2kfd) {
169 		kgd2kfd->exit();
170 		symbol_put(kgd2kfd_init);
171 	}
172 }
173 
radeon_kfd_device_probe(struct radeon_device * rdev)174 void radeon_kfd_device_probe(struct radeon_device *rdev)
175 {
176 	if (kgd2kfd)
177 		rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
178 			rdev->pdev, &kfd2kgd);
179 }
180 
radeon_kfd_device_init(struct radeon_device * rdev)181 void radeon_kfd_device_init(struct radeon_device *rdev)
182 {
183 	if (rdev->kfd) {
184 		struct kgd2kfd_shared_resources gpu_resources = {
185 			.compute_vmid_bitmap = 0xFF00,
186 
187 			.first_compute_pipe = 1,
188 			.compute_pipe_count = 4 - 1,
189 		};
190 
191 		radeon_doorbell_get_kfd_info(rdev,
192 				&gpu_resources.doorbell_physical_address,
193 				&gpu_resources.doorbell_aperture_size,
194 				&gpu_resources.doorbell_start_offset);
195 
196 		kgd2kfd->device_init(rdev->kfd, &gpu_resources);
197 	}
198 }
199 
radeon_kfd_device_fini(struct radeon_device * rdev)200 void radeon_kfd_device_fini(struct radeon_device *rdev)
201 {
202 	if (rdev->kfd) {
203 		kgd2kfd->device_exit(rdev->kfd);
204 		rdev->kfd = NULL;
205 	}
206 }
207 
radeon_kfd_interrupt(struct radeon_device * rdev,const void * ih_ring_entry)208 void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
209 {
210 	if (rdev->kfd)
211 		kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
212 }
213 
radeon_kfd_suspend(struct radeon_device * rdev)214 void radeon_kfd_suspend(struct radeon_device *rdev)
215 {
216 	if (rdev->kfd)
217 		kgd2kfd->suspend(rdev->kfd);
218 }
219 
radeon_kfd_resume(struct radeon_device * rdev)220 int radeon_kfd_resume(struct radeon_device *rdev)
221 {
222 	int r = 0;
223 
224 	if (rdev->kfd)
225 		r = kgd2kfd->resume(rdev->kfd);
226 
227 	return r;
228 }
229 
alloc_gtt_mem(struct kgd_dev * kgd,size_t size,void ** mem_obj,uint64_t * gpu_addr,void ** cpu_ptr)230 static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
231 			void **mem_obj, uint64_t *gpu_addr,
232 			void **cpu_ptr)
233 {
234 	struct radeon_device *rdev = (struct radeon_device *)kgd;
235 	struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
236 	int r;
237 
238 	BUG_ON(kgd == NULL);
239 	BUG_ON(gpu_addr == NULL);
240 	BUG_ON(cpu_ptr == NULL);
241 
242 	*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
243 	if ((*mem) == NULL)
244 		return -ENOMEM;
245 
246 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
247 				RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
248 	if (r) {
249 		dev_err(rdev->dev,
250 			"failed to allocate BO for amdkfd (%d)\n", r);
251 		return r;
252 	}
253 
254 	/* map the buffer */
255 	r = radeon_bo_reserve((*mem)->bo, true);
256 	if (r) {
257 		dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
258 		goto allocate_mem_reserve_bo_failed;
259 	}
260 
261 	r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT,
262 				&(*mem)->gpu_addr);
263 	if (r) {
264 		dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
265 		goto allocate_mem_pin_bo_failed;
266 	}
267 	*gpu_addr = (*mem)->gpu_addr;
268 
269 	r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
270 	if (r) {
271 		dev_err(rdev->dev,
272 			"(%d) failed to map bo to kernel for amdkfd\n", r);
273 		goto allocate_mem_kmap_bo_failed;
274 	}
275 	*cpu_ptr = (*mem)->cpu_ptr;
276 
277 	radeon_bo_unreserve((*mem)->bo);
278 
279 	return 0;
280 
281 allocate_mem_kmap_bo_failed:
282 	radeon_bo_unpin((*mem)->bo);
283 allocate_mem_pin_bo_failed:
284 	radeon_bo_unreserve((*mem)->bo);
285 allocate_mem_reserve_bo_failed:
286 	radeon_bo_unref(&(*mem)->bo);
287 
288 	return r;
289 }
290 
free_gtt_mem(struct kgd_dev * kgd,void * mem_obj)291 static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
292 {
293 	struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
294 
295 	BUG_ON(mem == NULL);
296 
297 	radeon_bo_reserve(mem->bo, true);
298 	radeon_bo_kunmap(mem->bo);
299 	radeon_bo_unpin(mem->bo);
300 	radeon_bo_unreserve(mem->bo);
301 	radeon_bo_unref(&(mem->bo));
302 	kfree(mem);
303 }
304 
get_vmem_size(struct kgd_dev * kgd)305 static uint64_t get_vmem_size(struct kgd_dev *kgd)
306 {
307 	struct radeon_device *rdev = (struct radeon_device *)kgd;
308 
309 	BUG_ON(kgd == NULL);
310 
311 	return rdev->mc.real_vram_size;
312 }
313 
get_gpu_clock_counter(struct kgd_dev * kgd)314 static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
315 {
316 	struct radeon_device *rdev = (struct radeon_device *)kgd;
317 
318 	return rdev->asic->get_gpu_clock_counter(rdev);
319 }
320 
get_max_engine_clock_in_mhz(struct kgd_dev * kgd)321 static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
322 {
323 	struct radeon_device *rdev = (struct radeon_device *)kgd;
324 
325 	/* The sclk is in quantas of 10kHz */
326 	return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
327 }
328 
get_radeon_device(struct kgd_dev * kgd)329 static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
330 {
331 	return (struct radeon_device *)kgd;
332 }
333 
write_register(struct kgd_dev * kgd,uint32_t offset,uint32_t value)334 static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
335 {
336 	struct radeon_device *rdev = get_radeon_device(kgd);
337 
338 	writel(value, (void __iomem *)(rdev->rmmio + offset));
339 }
340 
read_register(struct kgd_dev * kgd,uint32_t offset)341 static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
342 {
343 	struct radeon_device *rdev = get_radeon_device(kgd);
344 
345 	return readl((void __iomem *)(rdev->rmmio + offset));
346 }
347 
lock_srbm(struct kgd_dev * kgd,uint32_t mec,uint32_t pipe,uint32_t queue,uint32_t vmid)348 static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
349 			uint32_t queue, uint32_t vmid)
350 {
351 	struct radeon_device *rdev = get_radeon_device(kgd);
352 	uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
353 
354 	mutex_lock(&rdev->srbm_mutex);
355 	write_register(kgd, SRBM_GFX_CNTL, value);
356 }
357 
unlock_srbm(struct kgd_dev * kgd)358 static void unlock_srbm(struct kgd_dev *kgd)
359 {
360 	struct radeon_device *rdev = get_radeon_device(kgd);
361 
362 	write_register(kgd, SRBM_GFX_CNTL, 0);
363 	mutex_unlock(&rdev->srbm_mutex);
364 }
365 
acquire_queue(struct kgd_dev * kgd,uint32_t pipe_id,uint32_t queue_id)366 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
367 				uint32_t queue_id)
368 {
369 	uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
370 	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
371 
372 	lock_srbm(kgd, mec, pipe, queue_id, 0);
373 }
374 
release_queue(struct kgd_dev * kgd)375 static void release_queue(struct kgd_dev *kgd)
376 {
377 	unlock_srbm(kgd);
378 }
379 
kgd_program_sh_mem_settings(struct kgd_dev * kgd,uint32_t vmid,uint32_t sh_mem_config,uint32_t sh_mem_ape1_base,uint32_t sh_mem_ape1_limit,uint32_t sh_mem_bases)380 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
381 					uint32_t sh_mem_config,
382 					uint32_t sh_mem_ape1_base,
383 					uint32_t sh_mem_ape1_limit,
384 					uint32_t sh_mem_bases)
385 {
386 	lock_srbm(kgd, 0, 0, 0, vmid);
387 
388 	write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
389 	write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
390 	write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
391 	write_register(kgd, SH_MEM_BASES, sh_mem_bases);
392 
393 	unlock_srbm(kgd);
394 }
395 
kgd_set_pasid_vmid_mapping(struct kgd_dev * kgd,unsigned int pasid,unsigned int vmid)396 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
397 					unsigned int vmid)
398 {
399 	/*
400 	 * We have to assume that there is no outstanding mapping.
401 	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0
402 	 * because a mapping is in progress or because a mapping finished and
403 	 * the SW cleared it.
404 	 * So the protocol is to always wait & clear.
405 	 */
406 	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
407 					ATC_VMID_PASID_MAPPING_VALID_MASK;
408 
409 	write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
410 			pasid_mapping);
411 
412 	while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
413 								(1U << vmid)))
414 		cpu_relax();
415 	write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
416 
417 	/* Mapping vmid to pasid also for IH block */
418 	write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
419 			pasid_mapping);
420 
421 	return 0;
422 }
423 
kgd_init_pipeline(struct kgd_dev * kgd,uint32_t pipe_id,uint32_t hpd_size,uint64_t hpd_gpu_addr)424 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
425 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
426 {
427 	uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
428 	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
429 
430 	lock_srbm(kgd, mec, pipe, 0, 0);
431 	write_register(kgd, CP_HPD_EOP_BASE_ADDR,
432 			lower_32_bits(hpd_gpu_addr >> 8));
433 	write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
434 			upper_32_bits(hpd_gpu_addr >> 8));
435 	write_register(kgd, CP_HPD_EOP_VMID, 0);
436 	write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
437 	unlock_srbm(kgd);
438 
439 	return 0;
440 }
441 
kgd_init_interrupts(struct kgd_dev * kgd,uint32_t pipe_id)442 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
443 {
444 	uint32_t mec;
445 	uint32_t pipe;
446 
447 	mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
448 	pipe = (pipe_id % CIK_PIPE_PER_MEC);
449 
450 	lock_srbm(kgd, mec, pipe, 0, 0);
451 
452 	write_register(kgd, CPC_INT_CNTL,
453 			TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
454 
455 	unlock_srbm(kgd);
456 
457 	return 0;
458 }
459 
get_sdma_base_addr(struct cik_sdma_rlc_registers * m)460 static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
461 {
462 	uint32_t retval;
463 
464 	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
465 			m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
466 
467 	pr_debug("kfd: sdma base address: 0x%x\n", retval);
468 
469 	return retval;
470 }
471 
get_mqd(void * mqd)472 static inline struct cik_mqd *get_mqd(void *mqd)
473 {
474 	return (struct cik_mqd *)mqd;
475 }
476 
get_sdma_mqd(void * mqd)477 static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
478 {
479 	return (struct cik_sdma_rlc_registers *)mqd;
480 }
481 
kgd_hqd_load(struct kgd_dev * kgd,void * mqd,uint32_t pipe_id,uint32_t queue_id,uint32_t __user * wptr)482 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
483 			uint32_t queue_id, uint32_t __user *wptr)
484 {
485 	uint32_t wptr_shadow, is_wptr_shadow_valid;
486 	struct cik_mqd *m;
487 
488 	m = get_mqd(mqd);
489 
490 	is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
491 
492 	acquire_queue(kgd, pipe_id, queue_id);
493 	write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
494 	write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
495 	write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control);
496 
497 	write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
498 	write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
499 	write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
500 
501 	write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
502 	write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
503 	write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
504 
505 	write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
506 
507 	write_register(kgd, CP_HQD_PERSISTENT_STATE,
508 			m->cp_hqd_persistent_state);
509 	write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
510 	write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
511 
512 	write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO,
513 			m->cp_hqd_atomic0_preop_lo);
514 
515 	write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI,
516 			m->cp_hqd_atomic0_preop_hi);
517 
518 	write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO,
519 			m->cp_hqd_atomic1_preop_lo);
520 
521 	write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI,
522 			m->cp_hqd_atomic1_preop_hi);
523 
524 	write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR,
525 			m->cp_hqd_pq_rptr_report_addr_lo);
526 
527 	write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
528 			m->cp_hqd_pq_rptr_report_addr_hi);
529 
530 	write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
531 
532 	write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR,
533 			m->cp_hqd_pq_wptr_poll_addr_lo);
534 
535 	write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI,
536 			m->cp_hqd_pq_wptr_poll_addr_hi);
537 
538 	write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL,
539 			m->cp_hqd_pq_doorbell_control);
540 
541 	write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid);
542 
543 	write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum);
544 
545 	write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
546 	write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
547 
548 	write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
549 
550 	if (is_wptr_shadow_valid)
551 		write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
552 
553 	write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active);
554 	release_queue(kgd);
555 
556 	return 0;
557 }
558 
kgd_hqd_sdma_load(struct kgd_dev * kgd,void * mqd)559 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
560 {
561 	struct cik_sdma_rlc_registers *m;
562 	uint32_t sdma_base_addr;
563 
564 	m = get_sdma_mqd(mqd);
565 	sdma_base_addr = get_sdma_base_addr(m);
566 
567 	write_register(kgd,
568 			sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR,
569 			m->sdma_rlc_virtual_addr);
570 
571 	write_register(kgd,
572 			sdma_base_addr + SDMA0_RLC0_RB_BASE,
573 			m->sdma_rlc_rb_base);
574 
575 	write_register(kgd,
576 			sdma_base_addr + SDMA0_RLC0_RB_BASE_HI,
577 			m->sdma_rlc_rb_base_hi);
578 
579 	write_register(kgd,
580 			sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO,
581 			m->sdma_rlc_rb_rptr_addr_lo);
582 
583 	write_register(kgd,
584 			sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI,
585 			m->sdma_rlc_rb_rptr_addr_hi);
586 
587 	write_register(kgd,
588 			sdma_base_addr + SDMA0_RLC0_DOORBELL,
589 			m->sdma_rlc_doorbell);
590 
591 	write_register(kgd,
592 			sdma_base_addr + SDMA0_RLC0_RB_CNTL,
593 			m->sdma_rlc_rb_cntl);
594 
595 	return 0;
596 }
597 
kgd_hqd_is_occupied(struct kgd_dev * kgd,uint64_t queue_address,uint32_t pipe_id,uint32_t queue_id)598 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
599 				uint32_t pipe_id, uint32_t queue_id)
600 {
601 	uint32_t act;
602 	bool retval = false;
603 	uint32_t low, high;
604 
605 	acquire_queue(kgd, pipe_id, queue_id);
606 	act = read_register(kgd, CP_HQD_ACTIVE);
607 	if (act) {
608 		low = lower_32_bits(queue_address >> 8);
609 		high = upper_32_bits(queue_address >> 8);
610 
611 		if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
612 				high == read_register(kgd, CP_HQD_PQ_BASE_HI))
613 			retval = true;
614 	}
615 	release_queue(kgd);
616 	return retval;
617 }
618 
kgd_hqd_sdma_is_occupied(struct kgd_dev * kgd,void * mqd)619 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
620 {
621 	struct cik_sdma_rlc_registers *m;
622 	uint32_t sdma_base_addr;
623 	uint32_t sdma_rlc_rb_cntl;
624 
625 	m = get_sdma_mqd(mqd);
626 	sdma_base_addr = get_sdma_base_addr(m);
627 
628 	sdma_rlc_rb_cntl = read_register(kgd,
629 					sdma_base_addr + SDMA0_RLC0_RB_CNTL);
630 
631 	if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE)
632 		return true;
633 
634 	return false;
635 }
636 
kgd_hqd_destroy(struct kgd_dev * kgd,uint32_t reset_type,unsigned int timeout,uint32_t pipe_id,uint32_t queue_id)637 static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
638 				unsigned int timeout, uint32_t pipe_id,
639 				uint32_t queue_id)
640 {
641 	uint32_t temp;
642 
643 	acquire_queue(kgd, pipe_id, queue_id);
644 	write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
645 
646 	write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type);
647 
648 	while (true) {
649 		temp = read_register(kgd, CP_HQD_ACTIVE);
650 		if (temp & 0x1)
651 			break;
652 		if (timeout == 0) {
653 			pr_err("kfd: cp queue preemption time out (%dms)\n",
654 				temp);
655 			release_queue(kgd);
656 			return -ETIME;
657 		}
658 		msleep(20);
659 		timeout -= 20;
660 	}
661 
662 	release_queue(kgd);
663 	return 0;
664 }
665 
kgd_hqd_sdma_destroy(struct kgd_dev * kgd,void * mqd,unsigned int timeout)666 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
667 				unsigned int timeout)
668 {
669 	struct cik_sdma_rlc_registers *m;
670 	uint32_t sdma_base_addr;
671 	uint32_t temp;
672 
673 	m = get_sdma_mqd(mqd);
674 	sdma_base_addr = get_sdma_base_addr(m);
675 
676 	temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL);
677 	temp = temp & ~SDMA_RB_ENABLE;
678 	write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp);
679 
680 	while (true) {
681 		temp = read_register(kgd, sdma_base_addr +
682 						SDMA0_RLC0_CONTEXT_STATUS);
683 		if (temp & SDMA_RLC_IDLE)
684 			break;
685 		if (timeout == 0)
686 			return -ETIME;
687 		msleep(20);
688 		timeout -= 20;
689 	}
690 
691 	write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0);
692 	write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0);
693 	write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0);
694 	write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0);
695 
696 	return 0;
697 }
698 
kgd_address_watch_disable(struct kgd_dev * kgd)699 static int kgd_address_watch_disable(struct kgd_dev *kgd)
700 {
701 	union TCP_WATCH_CNTL_BITS cntl;
702 	unsigned int i;
703 
704 	cntl.u32All = 0;
705 
706 	cntl.bitfields.valid = 0;
707 	cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
708 	cntl.bitfields.atc = 1;
709 
710 	/* Turning off this address until we set all the registers */
711 	for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
712 		write_register(kgd,
713 				watchRegs[i * ADDRESS_WATCH_REG_MAX +
714 					ADDRESS_WATCH_REG_CNTL],
715 				cntl.u32All);
716 
717 	return 0;
718 }
719 
kgd_address_watch_execute(struct kgd_dev * kgd,unsigned int watch_point_id,uint32_t cntl_val,uint32_t addr_hi,uint32_t addr_lo)720 static int kgd_address_watch_execute(struct kgd_dev *kgd,
721 					unsigned int watch_point_id,
722 					uint32_t cntl_val,
723 					uint32_t addr_hi,
724 					uint32_t addr_lo)
725 {
726 	union TCP_WATCH_CNTL_BITS cntl;
727 
728 	cntl.u32All = cntl_val;
729 
730 	/* Turning off this watch point until we set all the registers */
731 	cntl.bitfields.valid = 0;
732 	write_register(kgd,
733 			watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
734 				ADDRESS_WATCH_REG_CNTL],
735 			cntl.u32All);
736 
737 	write_register(kgd,
738 			watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
739 				ADDRESS_WATCH_REG_ADDR_HI],
740 			addr_hi);
741 
742 	write_register(kgd,
743 			watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
744 				ADDRESS_WATCH_REG_ADDR_LO],
745 			addr_lo);
746 
747 	/* Enable the watch point */
748 	cntl.bitfields.valid = 1;
749 
750 	write_register(kgd,
751 			watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
752 				ADDRESS_WATCH_REG_CNTL],
753 			cntl.u32All);
754 
755 	return 0;
756 }
757 
kgd_wave_control_execute(struct kgd_dev * kgd,uint32_t gfx_index_val,uint32_t sq_cmd)758 static int kgd_wave_control_execute(struct kgd_dev *kgd,
759 					uint32_t gfx_index_val,
760 					uint32_t sq_cmd)
761 {
762 	struct radeon_device *rdev = get_radeon_device(kgd);
763 	uint32_t data;
764 
765 	mutex_lock(&rdev->grbm_idx_mutex);
766 
767 	write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
768 	write_register(kgd, SQ_CMD, sq_cmd);
769 
770 	/*  Restore the GRBM_GFX_INDEX register  */
771 
772 	data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
773 		SE_BROADCAST_WRITES;
774 
775 	write_register(kgd, GRBM_GFX_INDEX, data);
776 
777 	mutex_unlock(&rdev->grbm_idx_mutex);
778 
779 	return 0;
780 }
781 
kgd_address_watch_get_offset(struct kgd_dev * kgd,unsigned int watch_point_id,unsigned int reg_offset)782 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
783 					unsigned int watch_point_id,
784 					unsigned int reg_offset)
785 {
786 	return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
787 }
788 
get_atc_vmid_pasid_mapping_valid(struct kgd_dev * kgd,uint8_t vmid)789 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
790 {
791 	uint32_t reg;
792 	struct radeon_device *rdev = (struct radeon_device *) kgd;
793 
794 	reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
795 	return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
796 }
797 
get_atc_vmid_pasid_mapping_pasid(struct kgd_dev * kgd,uint8_t vmid)798 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
799 							uint8_t vmid)
800 {
801 	uint32_t reg;
802 	struct radeon_device *rdev = (struct radeon_device *) kgd;
803 
804 	reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
805 	return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
806 }
807 
write_vmid_invalidate_request(struct kgd_dev * kgd,uint8_t vmid)808 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
809 {
810 	struct radeon_device *rdev = (struct radeon_device *) kgd;
811 
812 	return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
813 }
814 
get_fw_version(struct kgd_dev * kgd,enum kgd_engine_type type)815 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
816 {
817 	struct radeon_device *rdev = (struct radeon_device *) kgd;
818 	const union radeon_firmware_header *hdr;
819 
820 	BUG_ON(kgd == NULL || rdev->mec_fw == NULL);
821 
822 	switch (type) {
823 	case KGD_ENGINE_PFP:
824 		hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data;
825 		break;
826 
827 	case KGD_ENGINE_ME:
828 		hdr = (const union radeon_firmware_header *) rdev->me_fw->data;
829 		break;
830 
831 	case KGD_ENGINE_CE:
832 		hdr = (const union radeon_firmware_header *) rdev->ce_fw->data;
833 		break;
834 
835 	case KGD_ENGINE_MEC1:
836 		hdr = (const union radeon_firmware_header *) rdev->mec_fw->data;
837 		break;
838 
839 	case KGD_ENGINE_MEC2:
840 		hdr = (const union radeon_firmware_header *)
841 							rdev->mec2_fw->data;
842 		break;
843 
844 	case KGD_ENGINE_RLC:
845 		hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data;
846 		break;
847 
848 	case KGD_ENGINE_SDMA1:
849 	case KGD_ENGINE_SDMA2:
850 		hdr = (const union radeon_firmware_header *)
851 							rdev->sdma_fw->data;
852 		break;
853 
854 	default:
855 		return 0;
856 	}
857 
858 	if (hdr == NULL)
859 		return 0;
860 
861 	/* Only 12 bit in use*/
862 	return hdr->common.ucode_version;
863 }
864