• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <stdarg.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <unistd.h>
32 #include <stdlib.h>
33 
34 #include "drm.h"
35 #include "xf86drmMode.h"
36 #include "xf86drm.h"
37 #include "amdgpu.h"
38 #include "amdgpu_drm.h"
39 #include "amdgpu_internal.h"
40 
41 #define MAX_CARDS_SUPPORTED	4
42 #define NUM_BUFFER_OBJECTS	1024
43 
44 #define SDMA_PACKET(op, sub_op, e)      ((((e) & 0xFFFF) << 16) |  \
45 					(((sub_op) & 0xFF) << 8) | \
46 					(((op) & 0xFF) << 0))
47 
48 #define SDMA_OPCODE_COPY				  1
49 #       define SDMA_COPY_SUB_OPCODE_LINEAR		0
50 
51 
52 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) | \
53 						(((b) & 0x1) << 26) |	\
54 						(((t) & 0x1) << 23) |	\
55 						(((s) & 0x1) << 22) |	\
56 						(((cnt) & 0xFFFFF) << 0))
57 #define SDMA_OPCODE_COPY_SI     3
58 
59 
60 /** Help string for command line parameters */
61 static const char usage[] =
62 	"Usage: %s [-?h] [-b v|g|vg size] "
63 	"[-c from to size count]\n"
64 	"where:\n"
65 	"	b - Allocate a BO in VRAM, GTT or VRAM|GTT of size bytes.\n"
66 	"	    This flag can be used multiple times. The first bo will\n"
67 	"	    have id `1`, then second id `2`, ...\n"
68 	"       c - Copy size bytes from BO (bo_id1) to BO (bo_id2), count times\n"
69 	"       h - Display this help\n"
70 	"\n"
71 	"Sizes can be postfixes with k, m or g for kilo, mega and gigabyte scaling\n";
72 
73 /** Specified options strings for getopt */
74 static const char options[]   = "?hb:c:";
75 
76 /* Open AMD devices.
77  * Returns the fd of the first device it could open.
78  */
amdgpu_open_device(void)79 static int amdgpu_open_device(void)
80 {
81 	drmDevicePtr devices[MAX_CARDS_SUPPORTED];
82 	unsigned int i;
83 	int drm_count;
84 
85 	drm_count = drmGetDevices2(0, devices, MAX_CARDS_SUPPORTED);
86 	if (drm_count < 0) {
87 		fprintf(stderr, "drmGetDevices2() returned an error %d\n",
88 			drm_count);
89 		return drm_count;
90 	}
91 
92 	for (i = 0; i < drm_count; i++) {
93 		drmVersionPtr version;
94 		int fd;
95 
96 		/* If this is not PCI device, skip*/
97 		if (devices[i]->bustype != DRM_BUS_PCI)
98 			continue;
99 
100 		/* If this is not AMD GPU vender ID, skip*/
101 		if (devices[i]->deviceinfo.pci->vendor_id != 0x1002)
102 			continue;
103 
104 		if (!(devices[i]->available_nodes & 1 << DRM_NODE_RENDER))
105 			continue;
106 
107 		fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
108 
109 		/* This node is not available. */
110 		if (fd < 0) continue;
111 
112 		version = drmGetVersion(fd);
113 		if (!version) {
114 			fprintf(stderr,
115 				"Warning: Cannot get version for %s."
116 				"Error is %s\n",
117 				devices[i]->nodes[DRM_NODE_RENDER],
118 				strerror(errno));
119 			close(fd);
120 			continue;
121 		}
122 
123 		if (strcmp(version->name, "amdgpu")) {
124 			/* This is not AMDGPU driver, skip.*/
125 			drmFreeVersion(version);
126 			close(fd);
127 			continue;
128 		}
129 
130 		drmFreeVersion(version);
131 		drmFreeDevices(devices, drm_count);
132 		return fd;
133 	}
134 
135 	return -1;
136 }
137 
138 amdgpu_device_handle device_handle;
139 amdgpu_context_handle context_handle;
140 
141 amdgpu_bo_handle resources[NUM_BUFFER_OBJECTS];
142 uint64_t virtual[NUM_BUFFER_OBJECTS];
143 unsigned int num_buffers;
144 uint32_t *pm4;
145 
alloc_bo(uint32_t domain,uint64_t size)146 int alloc_bo(uint32_t domain, uint64_t size)
147 {
148 	struct amdgpu_bo_alloc_request request = {};
149 	amdgpu_bo_handle bo;
150 	amdgpu_va_handle va;
151 	uint64_t addr;
152 	int r;
153 
154 	if (num_buffers >= NUM_BUFFER_OBJECTS)
155 		return -ENOSPC;
156 
157 	request.alloc_size = size;
158 	request.phys_alignment = 0;
159 	request.preferred_heap = domain;
160 	request.flags = 0;
161 	r = amdgpu_bo_alloc(device_handle, &request, &bo);
162 	if (r)
163 		return r;
164 
165 	r = amdgpu_va_range_alloc(device_handle, amdgpu_gpu_va_range_general,
166 				  size, 0, 0, &addr, &va, 0);
167 	if (r)
168 		return r;
169 
170 	r = amdgpu_bo_va_op_raw(device_handle, bo, 0, size, addr,
171 				AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
172 				AMDGPU_VM_PAGE_EXECUTABLE, AMDGPU_VA_OP_MAP);
173 	if (r)
174 		return r;
175 
176 	resources[num_buffers] = bo;
177 	virtual[num_buffers] = addr;
178 	fprintf(stdout, "Allocated BO number %u at 0x%lx, domain 0x%x, size %lu\n",
179 		num_buffers++, addr, domain, size);
180 	return 0;
181 }
182 
submit_ib(uint32_t from,uint32_t to,uint64_t size,uint32_t count)183 int submit_ib(uint32_t from, uint32_t to, uint64_t size, uint32_t count)
184 {
185 	struct amdgpu_cs_request ibs_request;
186 	struct amdgpu_cs_fence fence_status;
187 	struct amdgpu_cs_ib_info ib_info;
188 	uint64_t copied = size, delta;
189 	struct timespec start, stop;
190 
191 	uint64_t src = virtual[from];
192 	uint64_t dst = virtual[to];
193 	uint32_t expired;
194 	int i, r;
195 
196 	i = 0;
197 	while (size) {
198 		uint64_t bytes = size < 0x40000 ? size : 0x40000;
199 
200 		if (device_handle->info.family_id == AMDGPU_FAMILY_SI) {
201 			pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
202 						  bytes);
203 			pm4[i++] = 0xffffffff & dst;
204 			pm4[i++] = 0xffffffff & src;
205 			pm4[i++] = (0xffffffff00000000 & dst) >> 32;
206 			pm4[i++] = (0xffffffff00000000 & src) >> 32;
207 		} else {
208 			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
209 					       SDMA_COPY_SUB_OPCODE_LINEAR,
210 					       0);
211 			if ( device_handle->info.family_id >= AMDGPU_FAMILY_AI)
212 				pm4[i++] = bytes - 1;
213 			else
214 				pm4[i++] = bytes;
215 			pm4[i++] = 0;
216 			pm4[i++] = 0xffffffff & src;
217 			pm4[i++] = (0xffffffff00000000 & src) >> 32;
218 			pm4[i++] = 0xffffffff & dst;
219 			pm4[i++] = (0xffffffff00000000 & dst) >> 32;
220 		}
221 
222 		size -= bytes;
223 		src += bytes;
224 		dst += bytes;
225 	}
226 
227 	memset(&ib_info, 0, sizeof(ib_info));
228 	ib_info.ib_mc_address = virtual[0];
229 	ib_info.size = i;
230 
231 	memset(&ibs_request, 0, sizeof(ibs_request));
232 	ibs_request.ip_type = AMDGPU_HW_IP_DMA;
233 	ibs_request.ring = 0;
234 	ibs_request.number_of_ibs = 1;
235 	ibs_request.ibs = &ib_info;
236 	ibs_request.fence_info.handle = NULL;
237 
238 	r = clock_gettime(CLOCK_MONOTONIC, &start);
239 	if (r)
240 		return errno;
241 
242 	r = amdgpu_bo_list_create(device_handle, num_buffers, resources, NULL,
243 				  &ibs_request.resources);
244 	if (r)
245 		return r;
246 
247 	for (i = 0; i < count; ++i) {
248 		r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
249 		if (r)
250 			return r;
251 	}
252 
253 	r = amdgpu_bo_list_destroy(ibs_request.resources);
254 	if (r)
255 		return r;
256 
257 	memset(&fence_status, 0, sizeof(fence_status));
258 	fence_status.ip_type = ibs_request.ip_type;
259 	fence_status.ip_instance = 0;
260 	fence_status.ring = ibs_request.ring;
261 	fence_status.context = context_handle;
262 	fence_status.fence = ibs_request.seq_no;
263 	r = amdgpu_cs_query_fence_status(&fence_status,
264 					 AMDGPU_TIMEOUT_INFINITE,
265 					 0, &expired);
266 	if (r)
267 		return r;
268 
269 	r = clock_gettime(CLOCK_MONOTONIC, &stop);
270 	if (r)
271 		return errno;
272 
273 	delta = stop.tv_nsec + stop.tv_sec * 1000000000UL;
274 	delta -= start.tv_nsec + start.tv_sec * 1000000000UL;
275 
276 	fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes took %lu usec\n",
277 		count, from, virtual[from], to, virtual[to], copied, delta / 1000);
278 	return 0;
279 }
280 
next_arg(int argc,char ** argv,const char * msg)281 void next_arg(int argc, char **argv, const char *msg)
282 {
283 	optarg = argv[optind++];
284 	if (optind > argc || optarg[0] == '-') {
285 		fprintf(stderr, "%s\n", msg);
286 		exit(EXIT_FAILURE);
287 	}
288 }
289 
parse_size(void)290 uint64_t parse_size(void)
291 {
292 	uint64_t size;
293 	char ext[2];
294 
295 	ext[0] = 0;
296 	if (sscanf(optarg, "%li%1[kmgKMG]", &size, ext) < 1) {
297 		fprintf(stderr, "Can't parse size arg: %s\n", optarg);
298 		exit(EXIT_FAILURE);
299 	}
300 	switch (ext[0]) {
301 	case 'k':
302 	case 'K':
303 		size *= 1024;
304 		break;
305 	case 'm':
306 	case 'M':
307 		size *= 1024 * 1024;
308 		break;
309 	case 'g':
310 	case 'G':
311 		size *= 1024 * 1024 * 1024;
312 		break;
313 	default:
314 		break;
315 	}
316 	return size;
317 }
318 
main(int argc,char ** argv)319 int main(int argc, char **argv)
320 {
321 	uint32_t major_version, minor_version;
322 	uint32_t domain, from, to, count;
323        	uint64_t size;
324 	int fd, r, c;
325 
326 	fd = amdgpu_open_device();
327        	if (fd < 0) {
328 		perror("Cannot open AMDGPU device");
329 		exit(EXIT_FAILURE);
330 	}
331 
332 	r = amdgpu_device_initialize(fd, &major_version, &minor_version, &device_handle);
333 	if (r) {
334 		fprintf(stderr, "amdgpu_device_initialize returned %d\n", r);
335 		exit(EXIT_FAILURE);
336 	}
337 
338 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
339 	if (r) {
340 		fprintf(stderr, "amdgpu_cs_ctx_create returned %d\n", r);
341 		exit(EXIT_FAILURE);
342 	}
343 
344 	if (argc == 1) {
345 		fprintf(stderr, usage, argv[0]);
346 		exit(EXIT_FAILURE);
347 	}
348 
349 	r = alloc_bo(AMDGPU_GEM_DOMAIN_GTT, 2ULL * 1024 * 1024);
350 	if (r) {
351 		fprintf(stderr, "Buffer allocation failed with %d\n", r);
352 		exit(EXIT_FAILURE);
353 	}
354 
355 	r = amdgpu_bo_cpu_map(resources[0], (void **)&pm4);
356 	if (r) {
357 		fprintf(stderr, "Buffer mapping failed with %d\n", r);
358 		exit(EXIT_FAILURE);
359 	}
360 
361 	opterr = 0;
362 	while ((c = getopt(argc, argv, options)) != -1) {
363 		switch (c) {
364 		case 'b':
365 			if (!strcmp(optarg, "v"))
366 				domain = AMDGPU_GEM_DOMAIN_VRAM;
367 			else if (!strcmp(optarg, "g"))
368 				domain = AMDGPU_GEM_DOMAIN_GTT;
369 			else if (!strcmp(optarg, "vg"))
370 				domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT;
371 			else {
372 				fprintf(stderr, "Invalid domain: %s\n", optarg);
373 				exit(EXIT_FAILURE);
374 			}
375 			next_arg(argc, argv, "Missing buffer size");
376 			size = parse_size();
377 			if (size < getpagesize()) {
378 				fprintf(stderr, "Buffer size to small %lu\n", size);
379 				exit(EXIT_FAILURE);
380 			}
381 			r = alloc_bo(domain, size);
382 			if (r) {
383 				fprintf(stderr, "Buffer allocation failed with %d\n", r);
384 				exit(EXIT_FAILURE);
385 			}
386 			break;
387 		case 'c':
388 			if (sscanf(optarg, "%u", &from) != 1) {
389 				fprintf(stderr, "Can't parse from buffer: %s\n", optarg);
390 				exit(EXIT_FAILURE);
391 			}
392 			next_arg(argc, argv, "Missing to buffer");
393 			if (sscanf(optarg, "%u", &to) != 1) {
394 				fprintf(stderr, "Can't parse to buffer: %s\n", optarg);
395 				exit(EXIT_FAILURE);
396 			}
397 			next_arg(argc, argv, "Missing size");
398 			size = parse_size();
399 			next_arg(argc, argv, "Missing count");
400 			count = parse_size();
401 			r = submit_ib(from, to, size, count);
402 			if (r) {
403 				fprintf(stderr, "IB submission failed with %d\n", r);
404 				exit(EXIT_FAILURE);
405 			}
406 			break;
407 		case '?':
408 		case 'h':
409 			fprintf(stderr, usage, argv[0]);
410 			exit(EXIT_SUCCESS);
411 		default:
412 			fprintf(stderr, usage, argv[0]);
413 			exit(EXIT_FAILURE);
414 		}
415 	}
416 
417 	return EXIT_SUCCESS;
418 }
419