• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/log2.h>
27 #include <linux/sched.h>
28 #include <linux/slab.h>
29 #include <linux/mutex.h>
30 #include <linux/device.h>
31 
32 #include "kfd_pm4_headers.h"
33 #include "kfd_pm4_headers_diq.h"
34 #include "kfd_kernel_queue.h"
35 #include "kfd_priv.h"
36 #include "kfd_pm4_opcodes.h"
37 #include "cik_regs.h"
38 #include "kfd_dbgmgr.h"
39 #include "kfd_dbgdev.h"
40 #include "kfd_device_queue_manager.h"
41 #include "../../radeon/cik_reg.h"
42 
dbgdev_address_watch_disable_nodiq(struct kfd_dev * dev)43 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
44 {
45 	BUG_ON(!dev || !dev->kfd2kgd);
46 
47 	dev->kfd2kgd->address_watch_disable(dev->kgd);
48 }
49 
dbgdev_diq_submit_ib(struct kfd_dbgdev * dbgdev,unsigned int pasid,uint64_t vmid0_address,uint32_t * packet_buff,size_t size_in_bytes)50 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
51 				unsigned int pasid, uint64_t vmid0_address,
52 				uint32_t *packet_buff, size_t size_in_bytes)
53 {
54 	struct pm4__release_mem *rm_packet;
55 	struct pm4__indirect_buffer_pasid *ib_packet;
56 	struct kfd_mem_obj *mem_obj;
57 	size_t pq_packets_size_in_bytes;
58 	union ULARGE_INTEGER *largep;
59 	union ULARGE_INTEGER addr;
60 	struct kernel_queue *kq;
61 	uint64_t *rm_state;
62 	unsigned int *ib_packet_buff;
63 	int status;
64 
65 	BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
66 
67 	kq = dbgdev->kq;
68 
69 	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
70 				sizeof(struct pm4__indirect_buffer_pasid);
71 
72 	/*
73 	 * We acquire a buffer from DIQ
74 	 * The receive packet buff will be sitting on the Indirect Buffer
75 	 * and in the PQ we put the IB packet + sync packet(s).
76 	 */
77 	status = kq->ops.acquire_packet_buffer(kq,
78 				pq_packets_size_in_bytes / sizeof(uint32_t),
79 				&ib_packet_buff);
80 	if (status != 0) {
81 		pr_err("amdkfd: acquire_packet_buffer failed\n");
82 		return status;
83 	}
84 
85 	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
86 
87 	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
88 
89 	ib_packet->header.count = 3;
90 	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
91 	ib_packet->header.type = PM4_TYPE_3;
92 
93 	largep = (union ULARGE_INTEGER *) &vmid0_address;
94 
95 	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
96 	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
97 
98 	ib_packet->control = (1 << 23) | (1 << 31) |
99 			((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
100 
101 	ib_packet->bitfields5.pasid = pasid;
102 
103 	/*
104 	 * for now we use release mem for GPU-CPU synchronization
105 	 * Consider WaitRegMem + WriteData as a better alternative
106 	 * we get a GART allocations ( gpu/cpu mapping),
107 	 * for the sync variable, and wait until:
108 	 * (a) Sync with HW
109 	 * (b) Sync var is written by CP to mem.
110 	 */
111 	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
112 			(sizeof(struct pm4__indirect_buffer_pasid) /
113 					sizeof(unsigned int)));
114 
115 	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
116 					&mem_obj);
117 
118 	if (status != 0) {
119 		pr_err("amdkfd: Failed to allocate GART memory\n");
120 		kq->ops.rollback_packet(kq);
121 		return status;
122 	}
123 
124 	rm_state = (uint64_t *) mem_obj->cpu_ptr;
125 
126 	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
127 
128 	rm_packet->header.opcode = IT_RELEASE_MEM;
129 	rm_packet->header.type = PM4_TYPE_3;
130 	rm_packet->header.count = sizeof(struct pm4__release_mem) /
131 					sizeof(unsigned int) - 2;
132 
133 	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
134 	rm_packet->bitfields2.event_index =
135 				event_index___release_mem__end_of_pipe;
136 
137 	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
138 	rm_packet->bitfields2.atc = 0;
139 	rm_packet->bitfields2.tc_wb_action_ena = 1;
140 
141 	addr.quad_part = mem_obj->gpu_addr;
142 
143 	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
144 	rm_packet->address_hi = addr.u.high_part;
145 
146 	rm_packet->bitfields3.data_sel =
147 				data_sel___release_mem__send_64_bit_data;
148 
149 	rm_packet->bitfields3.int_sel =
150 			int_sel___release_mem__send_data_after_write_confirm;
151 
152 	rm_packet->bitfields3.dst_sel =
153 			dst_sel___release_mem__memory_controller;
154 
155 	rm_packet->data_lo = QUEUESTATE__ACTIVE;
156 
157 	kq->ops.submit_packet(kq);
158 
159 	/* Wait till CP writes sync code: */
160 	status = amdkfd_fence_wait_timeout(
161 			(unsigned int *) rm_state,
162 			QUEUESTATE__ACTIVE, 1500);
163 
164 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
165 
166 	return status;
167 }
168 
dbgdev_register_nodiq(struct kfd_dbgdev * dbgdev)169 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
170 {
171 	BUG_ON(!dbgdev);
172 
173 	/*
174 	 * no action is needed in this case,
175 	 * just make sure diq will not be used
176 	 */
177 
178 	dbgdev->kq = NULL;
179 
180 	return 0;
181 }
182 
dbgdev_register_diq(struct kfd_dbgdev * dbgdev)183 static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
184 {
185 	struct queue_properties properties;
186 	unsigned int qid;
187 	struct kernel_queue *kq = NULL;
188 	int status;
189 
190 	BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
191 
192 	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
193 				&properties, 0, KFD_QUEUE_TYPE_DIQ,
194 				&qid);
195 
196 	if (status) {
197 		pr_err("amdkfd: Failed to create DIQ\n");
198 		return status;
199 	}
200 
201 	pr_debug("DIQ Created with queue id: %d\n", qid);
202 
203 	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
204 
205 	if (kq == NULL) {
206 		pr_err("amdkfd: Error getting DIQ\n");
207 		pqm_destroy_queue(dbgdev->pqm, qid);
208 		return -EFAULT;
209 	}
210 
211 	dbgdev->kq = kq;
212 
213 	return status;
214 }
215 
dbgdev_unregister_nodiq(struct kfd_dbgdev * dbgdev)216 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
217 {
218 	BUG_ON(!dbgdev || !dbgdev->dev);
219 
220 	/* disable watch address */
221 	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
222 	return 0;
223 }
224 
dbgdev_unregister_diq(struct kfd_dbgdev * dbgdev)225 static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
226 {
227 	/* todo - disable address watch */
228 	int status;
229 
230 	BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
231 
232 	status = pqm_destroy_queue(dbgdev->pqm,
233 			dbgdev->kq->queue->properties.queue_id);
234 	dbgdev->kq = NULL;
235 
236 	return status;
237 }
238 
dbgdev_address_watch_set_registers(const struct dbg_address_watch_info * adw_info,union TCP_WATCH_ADDR_H_BITS * addrHi,union TCP_WATCH_ADDR_L_BITS * addrLo,union TCP_WATCH_CNTL_BITS * cntl,unsigned int index,unsigned int vmid)239 static void dbgdev_address_watch_set_registers(
240 			const struct dbg_address_watch_info *adw_info,
241 			union TCP_WATCH_ADDR_H_BITS *addrHi,
242 			union TCP_WATCH_ADDR_L_BITS *addrLo,
243 			union TCP_WATCH_CNTL_BITS *cntl,
244 			unsigned int index, unsigned int vmid)
245 {
246 	union ULARGE_INTEGER addr;
247 
248 	BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
249 
250 	addr.quad_part = 0;
251 	addrHi->u32All = 0;
252 	addrLo->u32All = 0;
253 	cntl->u32All = 0;
254 
255 	if (adw_info->watch_mask != NULL)
256 		cntl->bitfields.mask =
257 			(uint32_t) (adw_info->watch_mask[index] &
258 					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
259 	else
260 		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
261 
262 	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
263 
264 	addrHi->bitfields.addr = addr.u.high_part &
265 					ADDRESS_WATCH_REG_ADDHIGH_MASK;
266 	addrLo->bitfields.addr =
267 			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
268 
269 	cntl->bitfields.mode = adw_info->watch_mode[index];
270 	cntl->bitfields.vmid = (uint32_t) vmid;
271 	/* for now assume it is an ATC address */
272 	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
273 
274 	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
275 	pr_debug("\t\t%20s %08x\n", "set reg add high :",
276 			addrHi->bitfields.addr);
277 	pr_debug("\t\t%20s %08x\n", "set reg add low :",
278 			addrLo->bitfields.addr);
279 }
280 
dbgdev_address_watch_nodiq(struct kfd_dbgdev * dbgdev,struct dbg_address_watch_info * adw_info)281 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
282 					struct dbg_address_watch_info *adw_info)
283 {
284 	union TCP_WATCH_ADDR_H_BITS addrHi;
285 	union TCP_WATCH_ADDR_L_BITS addrLo;
286 	union TCP_WATCH_CNTL_BITS cntl;
287 	struct kfd_process_device *pdd;
288 	unsigned int i;
289 
290 	BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
291 
292 	/* taking the vmid for that process on the safe way using pdd */
293 	pdd = kfd_get_process_device_data(dbgdev->dev,
294 					adw_info->process);
295 	if (!pdd) {
296 		pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
297 		return -EFAULT;
298 	}
299 
300 	addrHi.u32All = 0;
301 	addrLo.u32All = 0;
302 	cntl.u32All = 0;
303 
304 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
305 			(adw_info->num_watch_points == 0)) {
306 		pr_err("amdkfd: num_watch_points is invalid\n");
307 		return -EINVAL;
308 	}
309 
310 	if ((adw_info->watch_mode == NULL) ||
311 		(adw_info->watch_address == NULL)) {
312 		pr_err("amdkfd: adw_info fields are not valid\n");
313 		return -EINVAL;
314 	}
315 
316 	for (i = 0 ; i < adw_info->num_watch_points ; i++) {
317 		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
318 						&cntl, i, pdd->qpd.vmid);
319 
320 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
321 		pr_debug("\t\t%20s %08x\n", "register index :", i);
322 		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
323 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
324 				addrLo.bitfields.addr);
325 		pr_debug("\t\t%20s %08x\n", "Address high is :",
326 				addrHi.bitfields.addr);
327 		pr_debug("\t\t%20s %08x\n", "Address high is :",
328 				addrHi.bitfields.addr);
329 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
330 				cntl.bitfields.mask);
331 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
332 				cntl.bitfields.mode);
333 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
334 				cntl.bitfields.vmid);
335 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
336 				cntl.bitfields.atc);
337 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
338 
339 		pdd->dev->kfd2kgd->address_watch_execute(
340 						dbgdev->dev->kgd,
341 						i,
342 						cntl.u32All,
343 						addrHi.u32All,
344 						addrLo.u32All);
345 	}
346 
347 	return 0;
348 }
349 
dbgdev_address_watch_diq(struct kfd_dbgdev * dbgdev,struct dbg_address_watch_info * adw_info)350 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
351 					struct dbg_address_watch_info *adw_info)
352 {
353 	struct pm4__set_config_reg *packets_vec;
354 	union TCP_WATCH_ADDR_H_BITS addrHi;
355 	union TCP_WATCH_ADDR_L_BITS addrLo;
356 	union TCP_WATCH_CNTL_BITS cntl;
357 	struct kfd_mem_obj *mem_obj;
358 	unsigned int aw_reg_add_dword;
359 	uint32_t *packet_buff_uint;
360 	unsigned int i;
361 	int status;
362 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
363 	/* we do not control the vmid in DIQ mode, just a place holder */
364 	unsigned int vmid = 0;
365 
366 	BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
367 
368 	addrHi.u32All = 0;
369 	addrLo.u32All = 0;
370 	cntl.u32All = 0;
371 
372 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
373 			(adw_info->num_watch_points == 0)) {
374 		pr_err("amdkfd: num_watch_points is invalid\n");
375 		return -EINVAL;
376 	}
377 
378 	if ((NULL == adw_info->watch_mode) ||
379 			(NULL == adw_info->watch_address)) {
380 		pr_err("amdkfd: adw_info fields are not valid\n");
381 		return -EINVAL;
382 	}
383 
384 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
385 
386 	if (status != 0) {
387 		pr_err("amdkfd: Failed to allocate GART memory\n");
388 		return status;
389 	}
390 
391 	packet_buff_uint = mem_obj->cpu_ptr;
392 
393 	memset(packet_buff_uint, 0, ib_size);
394 
395 	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
396 
397 	packets_vec[0].header.count = 1;
398 	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
399 	packets_vec[0].header.type = PM4_TYPE_3;
400 	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
401 	packets_vec[0].bitfields2.insert_vmid = 1;
402 	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
403 	packets_vec[1].bitfields2.insert_vmid = 0;
404 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
405 	packets_vec[2].bitfields2.insert_vmid = 0;
406 	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
407 	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
408 	packets_vec[3].bitfields2.insert_vmid = 1;
409 
410 	for (i = 0; i < adw_info->num_watch_points; i++) {
411 		dbgdev_address_watch_set_registers(adw_info,
412 						&addrHi,
413 						&addrLo,
414 						&cntl,
415 						i,
416 						vmid);
417 
418 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
419 		pr_debug("\t\t%20s %08x\n", "register index :", i);
420 		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
421 		pr_debug("\t\t%20s %p\n", "Add ptr is :",
422 				adw_info->watch_address);
423 		pr_debug("\t\t%20s %08llx\n", "Add     is :",
424 				adw_info->watch_address[i]);
425 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
426 				addrLo.bitfields.addr);
427 		pr_debug("\t\t%20s %08x\n", "Address high is :",
428 				addrHi.bitfields.addr);
429 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
430 				cntl.bitfields.mask);
431 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
432 				cntl.bitfields.mode);
433 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
434 				cntl.bitfields.vmid);
435 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
436 				cntl.bitfields.atc);
437 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
438 
439 		aw_reg_add_dword =
440 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
441 					dbgdev->dev->kgd,
442 					i,
443 					ADDRESS_WATCH_REG_CNTL);
444 
445 		aw_reg_add_dword /= sizeof(uint32_t);
446 
447 		packets_vec[0].bitfields2.reg_offset =
448 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
449 
450 		packets_vec[0].reg_data[0] = cntl.u32All;
451 
452 		aw_reg_add_dword =
453 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
454 					dbgdev->dev->kgd,
455 					i,
456 					ADDRESS_WATCH_REG_ADDR_HI);
457 
458 		aw_reg_add_dword /= sizeof(uint32_t);
459 
460 		packets_vec[1].bitfields2.reg_offset =
461 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
462 		packets_vec[1].reg_data[0] = addrHi.u32All;
463 
464 		aw_reg_add_dword =
465 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
466 					dbgdev->dev->kgd,
467 					i,
468 					ADDRESS_WATCH_REG_ADDR_LO);
469 
470 		aw_reg_add_dword /= sizeof(uint32_t);
471 
472 		packets_vec[2].bitfields2.reg_offset =
473 				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
474 		packets_vec[2].reg_data[0] = addrLo.u32All;
475 
476 		/* enable watch flag if address is not zero*/
477 		if (adw_info->watch_address[i] > 0)
478 			cntl.bitfields.valid = 1;
479 		else
480 			cntl.bitfields.valid = 0;
481 
482 		aw_reg_add_dword =
483 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
484 					dbgdev->dev->kgd,
485 					i,
486 					ADDRESS_WATCH_REG_CNTL);
487 
488 		aw_reg_add_dword /= sizeof(uint32_t);
489 
490 		packets_vec[3].bitfields2.reg_offset =
491 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
492 		packets_vec[3].reg_data[0] = cntl.u32All;
493 
494 		status = dbgdev_diq_submit_ib(
495 					dbgdev,
496 					adw_info->process->pasid,
497 					mem_obj->gpu_addr,
498 					packet_buff_uint,
499 					ib_size);
500 
501 		if (status != 0) {
502 			pr_err("amdkfd: Failed to submit IB to DIQ\n");
503 			break;
504 		}
505 	}
506 
507 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
508 	return status;
509 }
510 
dbgdev_wave_control_set_registers(struct dbg_wave_control_info * wac_info,union SQ_CMD_BITS * in_reg_sq_cmd,union GRBM_GFX_INDEX_BITS * in_reg_gfx_index)511 static int dbgdev_wave_control_set_registers(
512 				struct dbg_wave_control_info *wac_info,
513 				union SQ_CMD_BITS *in_reg_sq_cmd,
514 				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
515 {
516 	int status = 0;
517 	union SQ_CMD_BITS reg_sq_cmd;
518 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
519 	struct HsaDbgWaveMsgAMDGen2 *pMsg;
520 
521 	BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
522 
523 	reg_sq_cmd.u32All = 0;
524 	reg_gfx_index.u32All = 0;
525 	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
526 
527 	switch (wac_info->mode) {
528 	/* Send command to single wave */
529 	case HSA_DBG_WAVEMODE_SINGLE:
530 		/*
531 		 * Limit access to the process waves only,
532 		 * by setting vmid check
533 		 */
534 		reg_sq_cmd.bits.check_vmid = 1;
535 		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
536 		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
537 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
538 
539 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
540 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
541 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
542 
543 		break;
544 
545 	/* Send command to all waves with matching VMID */
546 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
547 
548 		reg_gfx_index.bits.sh_broadcast_writes = 1;
549 		reg_gfx_index.bits.se_broadcast_writes = 1;
550 		reg_gfx_index.bits.instance_broadcast_writes = 1;
551 
552 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
553 
554 		break;
555 
556 	/* Send command to all CU waves with matching VMID */
557 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
558 
559 		reg_sq_cmd.bits.check_vmid = 1;
560 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
561 
562 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
563 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
564 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
565 
566 		break;
567 
568 	default:
569 		return -EINVAL;
570 	}
571 
572 	switch (wac_info->operand) {
573 	case HSA_DBG_WAVEOP_HALT:
574 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
575 		break;
576 
577 	case HSA_DBG_WAVEOP_RESUME:
578 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
579 		break;
580 
581 	case HSA_DBG_WAVEOP_KILL:
582 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
583 		break;
584 
585 	case HSA_DBG_WAVEOP_DEBUG:
586 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
587 		break;
588 
589 	case HSA_DBG_WAVEOP_TRAP:
590 		if (wac_info->trapId < MAX_TRAPID) {
591 			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
592 			reg_sq_cmd.bits.trap_id = wac_info->trapId;
593 		} else {
594 			status = -EINVAL;
595 		}
596 		break;
597 
598 	default:
599 		status = -EINVAL;
600 		break;
601 	}
602 
603 	if (status == 0) {
604 		*in_reg_sq_cmd = reg_sq_cmd;
605 		*in_reg_gfx_index = reg_gfx_index;
606 	}
607 
608 	return status;
609 }
610 
dbgdev_wave_control_diq(struct kfd_dbgdev * dbgdev,struct dbg_wave_control_info * wac_info)611 static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
612 					struct dbg_wave_control_info *wac_info)
613 {
614 
615 	int status;
616 	union SQ_CMD_BITS reg_sq_cmd;
617 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
618 	struct kfd_mem_obj *mem_obj;
619 	uint32_t *packet_buff_uint;
620 	struct pm4__set_config_reg *packets_vec;
621 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
622 
623 	BUG_ON(!dbgdev || !wac_info);
624 
625 	reg_sq_cmd.u32All = 0;
626 
627 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
628 							&reg_gfx_index);
629 	if (status) {
630 		pr_err("amdkfd: Failed to set wave control registers\n");
631 		return status;
632 	}
633 
634 	/* we do not control the VMID in DIQ,so reset it to a known value */
635 	reg_sq_cmd.bits.vm_id = 0;
636 
637 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
638 
639 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
640 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
641 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
642 	pr_debug("\t\t msg value is: %u\n",
643 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
644 	pr_debug("\t\t vmid      is: N/A\n");
645 
646 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
647 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
648 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
649 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
650 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
651 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
652 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
653 
654 	pr_debug("\t\t ibw       is : %u\n",
655 			reg_gfx_index.bitfields.instance_broadcast_writes);
656 	pr_debug("\t\t ii        is : %u\n",
657 			reg_gfx_index.bitfields.instance_index);
658 	pr_debug("\t\t sebw      is : %u\n",
659 			reg_gfx_index.bitfields.se_broadcast_writes);
660 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
661 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
662 	pr_debug("\t\t sbw       is : %u\n",
663 			reg_gfx_index.bitfields.sh_broadcast_writes);
664 
665 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
666 
667 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
668 
669 	if (status != 0) {
670 		pr_err("amdkfd: Failed to allocate GART memory\n");
671 		return status;
672 	}
673 
674 	packet_buff_uint = mem_obj->cpu_ptr;
675 
676 	memset(packet_buff_uint, 0, ib_size);
677 
678 	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
679 	packets_vec[0].header.count = 1;
680 	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
681 	packets_vec[0].header.type = PM4_TYPE_3;
682 	packets_vec[0].bitfields2.reg_offset =
683 			GRBM_GFX_INDEX / (sizeof(uint32_t)) -
684 				USERCONFIG_REG_BASE;
685 
686 	packets_vec[0].bitfields2.insert_vmid = 0;
687 	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
688 
689 	packets_vec[1].header.count = 1;
690 	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
691 	packets_vec[1].header.type = PM4_TYPE_3;
692 	packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
693 						AMD_CONFIG_REG_BASE;
694 
695 	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
696 	packets_vec[1].bitfields2.insert_vmid = 1;
697 	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
698 
699 	/* Restore the GRBM_GFX_INDEX register */
700 
701 	reg_gfx_index.u32All = 0;
702 	reg_gfx_index.bits.sh_broadcast_writes = 1;
703 	reg_gfx_index.bits.instance_broadcast_writes = 1;
704 	reg_gfx_index.bits.se_broadcast_writes = 1;
705 
706 
707 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
708 	packets_vec[2].bitfields2.reg_offset =
709 				GRBM_GFX_INDEX / (sizeof(uint32_t)) -
710 					USERCONFIG_REG_BASE;
711 
712 	packets_vec[2].bitfields2.insert_vmid = 0;
713 	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
714 
715 	status = dbgdev_diq_submit_ib(
716 			dbgdev,
717 			wac_info->process->pasid,
718 			mem_obj->gpu_addr,
719 			packet_buff_uint,
720 			ib_size);
721 
722 	if (status != 0)
723 		pr_err("amdkfd: Failed to submit IB to DIQ\n");
724 
725 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
726 
727 	return status;
728 }
729 
dbgdev_wave_control_nodiq(struct kfd_dbgdev * dbgdev,struct dbg_wave_control_info * wac_info)730 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
731 					struct dbg_wave_control_info *wac_info)
732 {
733 	int status;
734 	union SQ_CMD_BITS reg_sq_cmd;
735 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
736 	struct kfd_process_device *pdd;
737 
738 	BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
739 
740 	reg_sq_cmd.u32All = 0;
741 
742 	/* taking the VMID for that process on the safe way using PDD */
743 	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
744 
745 	if (!pdd) {
746 		pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
747 		return -EFAULT;
748 	}
749 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
750 							&reg_gfx_index);
751 	if (status) {
752 		pr_err("amdkfd: Failed to set wave control registers\n");
753 		return status;
754 	}
755 
756 	/* for non DIQ we need to patch the VMID: */
757 
758 	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
759 
760 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
761 
762 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
763 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
764 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
765 	pr_debug("\t\t msg value is: %u\n",
766 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
767 	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
768 
769 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
770 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
771 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
772 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
773 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
774 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
775 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
776 
777 	pr_debug("\t\t ibw       is : %u\n",
778 			reg_gfx_index.bitfields.instance_broadcast_writes);
779 	pr_debug("\t\t ii        is : %u\n",
780 			reg_gfx_index.bitfields.instance_index);
781 	pr_debug("\t\t sebw      is : %u\n",
782 			reg_gfx_index.bitfields.se_broadcast_writes);
783 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
784 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
785 	pr_debug("\t\t sbw       is : %u\n",
786 			reg_gfx_index.bitfields.sh_broadcast_writes);
787 
788 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
789 
790 	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
791 							reg_gfx_index.u32All,
792 							reg_sq_cmd.u32All);
793 }
794 
dbgdev_wave_reset_wavefronts(struct kfd_dev * dev,struct kfd_process * p)795 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
796 {
797 	int status = 0;
798 	unsigned int vmid;
799 	union SQ_CMD_BITS reg_sq_cmd;
800 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
801 	struct kfd_process_device *pdd;
802 	struct dbg_wave_control_info wac_info;
803 	int temp;
804 	int first_vmid_to_scan = 8;
805 	int last_vmid_to_scan = 15;
806 
807 	first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
808 	temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
809 	last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
810 
811 	reg_sq_cmd.u32All = 0;
812 	status = 0;
813 
814 	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
815 	wac_info.operand = HSA_DBG_WAVEOP_KILL;
816 
817 	pr_debug("Killing all process wavefronts\n");
818 
819 	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
820 	 * ATC_VMID15_PASID_MAPPING
821 	 * to check which VMID the current process is mapped to. */
822 
823 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
824 		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
825 				(dev->kgd, vmid)) {
826 			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
827 					(dev->kgd, vmid) == p->pasid) {
828 				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
829 						vmid, p->pasid);
830 				break;
831 			}
832 		}
833 	}
834 
835 	if (vmid > last_vmid_to_scan) {
836 		pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
837 		return -EFAULT;
838 	}
839 
840 	/* taking the VMID for that process on the safe way using PDD */
841 	pdd = kfd_get_process_device_data(dev, p);
842 	if (!pdd)
843 		return -EFAULT;
844 
845 	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
846 			&reg_gfx_index);
847 	if (status != 0)
848 		return -EINVAL;
849 
850 	/* for non DIQ we need to patch the VMID: */
851 	reg_sq_cmd.bits.vm_id = vmid;
852 
853 	dev->kfd2kgd->wave_control_execute(dev->kgd,
854 					reg_gfx_index.u32All,
855 					reg_sq_cmd.u32All);
856 
857 	return 0;
858 }
859 
kfd_dbgdev_init(struct kfd_dbgdev * pdbgdev,struct kfd_dev * pdev,enum DBGDEV_TYPE type)860 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
861 			enum DBGDEV_TYPE type)
862 {
863 	BUG_ON(!pdbgdev || !pdev);
864 
865 	pdbgdev->dev = pdev;
866 	pdbgdev->kq = NULL;
867 	pdbgdev->type = type;
868 	pdbgdev->pqm = NULL;
869 
870 	switch (type) {
871 	case DBGDEV_TYPE_NODIQ:
872 		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
873 		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
874 		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
875 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
876 		break;
877 	case DBGDEV_TYPE_DIQ:
878 	default:
879 		pdbgdev->dbgdev_register = dbgdev_register_diq;
880 		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
881 		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
882 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
883 		break;
884 	}
885 
886 }
887