1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <stdio.h> 25 #include <sys/types.h> 26 #include <sys/stat.h> 27 #include <fcntl.h> 28 #include <stdarg.h> 29 #include <string.h> 30 #include <errno.h> 31 #include <unistd.h> 32 #include <stdlib.h> 33 #include <inttypes.h> 34 35 #include "drm.h" 36 #include "xf86drmMode.h" 37 #include "xf86drm.h" 38 #include "amdgpu.h" 39 #include "amdgpu_drm.h" 40 #include "amdgpu_internal.h" 41 42 #define MAX_CARDS_SUPPORTED 4 43 #define NUM_BUFFER_OBJECTS 1024 44 45 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 46 (((sub_op) & 0xFF) << 8) | \ 47 (((op) & 0xFF) << 0)) 48 49 #define SDMA_OPCODE_COPY 1 50 # define SDMA_COPY_SUB_OPCODE_LINEAR 0 51 52 53 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 54 (((b) & 0x1) << 26) | \ 55 (((t) & 0x1) << 23) | \ 56 (((s) & 0x1) << 22) | \ 57 (((cnt) & 0xFFFFF) << 0)) 58 #define SDMA_OPCODE_COPY_SI 3 59 60 61 /** Help string for command line parameters */ 62 static const char usage[] = 63 "Usage: %s [-?h] [-b v|g|vg size] " 64 "[-c from to size count]\n" 65 "where:\n" 66 " b - Allocate a BO in VRAM, GTT or VRAM|GTT of size bytes.\n" 67 " This flag can be used multiple times. The first bo will\n" 68 " have id `1`, then second id `2`, ...\n" 69 " c - Copy size bytes from BO (bo_id1) to BO (bo_id2), count times\n" 70 " h - Display this help\n" 71 "\n" 72 "Sizes can be postfixes with k, m or g for kilo, mega and gigabyte scaling\n"; 73 74 /** Specified options strings for getopt */ 75 static const char options[] = "?hb:c:"; 76 77 /* Open AMD devices. 78 * Returns the fd of the first device it could open. 79 */ 80 static int amdgpu_open_device(void) 81 { 82 drmDevicePtr devices[MAX_CARDS_SUPPORTED]; 83 unsigned int i; 84 int drm_count; 85 86 drm_count = drmGetDevices2(0, devices, MAX_CARDS_SUPPORTED); 87 if (drm_count < 0) { 88 fprintf(stderr, "drmGetDevices2() returned an error %d\n", 89 drm_count); 90 return drm_count; 91 } 92 93 for (i = 0; i < drm_count; i++) { 94 drmVersionPtr version; 95 int fd; 96 97 /* If this is not PCI device, skip*/ 98 if (devices[i]->bustype != DRM_BUS_PCI) 99 continue; 100 101 /* If this is not AMD GPU vender ID, skip*/ 102 if (devices[i]->deviceinfo.pci->vendor_id != 0x1002) 103 continue; 104 105 if (!(devices[i]->available_nodes & 1 << DRM_NODE_RENDER)) 106 continue; 107 108 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC); 109 110 /* This node is not available. */ 111 if (fd < 0) continue; 112 113 version = drmGetVersion(fd); 114 if (!version) { 115 fprintf(stderr, 116 "Warning: Cannot get version for %s." 117 "Error is %s\n", 118 devices[i]->nodes[DRM_NODE_RENDER], 119 strerror(errno)); 120 close(fd); 121 continue; 122 } 123 124 if (strcmp(version->name, "amdgpu")) { 125 /* This is not AMDGPU driver, skip.*/ 126 drmFreeVersion(version); 127 close(fd); 128 continue; 129 } 130 131 drmFreeVersion(version); 132 drmFreeDevices(devices, drm_count); 133 return fd; 134 } 135 136 return -1; 137 } 138 139 amdgpu_device_handle device_handle; 140 amdgpu_context_handle context_handle; 141 142 amdgpu_bo_handle resources[NUM_BUFFER_OBJECTS]; 143 uint64_t virtual[NUM_BUFFER_OBJECTS]; 144 unsigned int num_buffers; 145 uint32_t *pm4; 146 147 int alloc_bo(uint32_t domain, uint64_t size) 148 { 149 struct amdgpu_bo_alloc_request request = {}; 150 amdgpu_bo_handle bo; 151 amdgpu_va_handle va; 152 uint64_t addr; 153 int r; 154 155 if (num_buffers >= NUM_BUFFER_OBJECTS) 156 return -ENOSPC; 157 158 request.alloc_size = size; 159 request.phys_alignment = 0; 160 request.preferred_heap = domain; 161 request.flags = 0; 162 r = amdgpu_bo_alloc(device_handle, &request, &bo); 163 if (r) 164 return r; 165 166 r = amdgpu_va_range_alloc(device_handle, amdgpu_gpu_va_range_general, 167 size, 0, 0, &addr, &va, 0); 168 if (r) 169 return r; 170 171 r = amdgpu_bo_va_op_raw(device_handle, bo, 0, size, addr, 172 AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | 173 AMDGPU_VM_PAGE_EXECUTABLE, AMDGPU_VA_OP_MAP); 174 if (r) 175 return r; 176 177 resources[num_buffers] = bo; 178 virtual[num_buffers] = addr; 179 fprintf(stdout, "Allocated BO number %u at 0x%" PRIx64 ", domain 0x%x, size %" PRIu64 "\n", 180 num_buffers++, addr, domain, size); 181 return 0; 182 } 183 184 int submit_ib(uint32_t from, uint32_t to, uint64_t size, uint32_t count) 185 { 186 struct amdgpu_cs_request ibs_request; 187 struct amdgpu_cs_fence fence_status; 188 struct amdgpu_cs_ib_info ib_info; 189 uint64_t copied = size, delta; 190 struct timespec start, stop; 191 192 uint64_t src = virtual[from]; 193 uint64_t dst = virtual[to]; 194 uint32_t expired; 195 int i, r; 196 197 i = 0; 198 while (size) { 199 uint64_t bytes = size < 0x40000 ? size : 0x40000; 200 201 if (device_handle->info.family_id == AMDGPU_FAMILY_SI) { 202 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 203 bytes); 204 pm4[i++] = 0xffffffff & dst; 205 pm4[i++] = 0xffffffff & src; 206 pm4[i++] = (0xffffffff00000000 & dst) >> 32; 207 pm4[i++] = (0xffffffff00000000 & src) >> 32; 208 } else { 209 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 210 SDMA_COPY_SUB_OPCODE_LINEAR, 211 0); 212 if ( device_handle->info.family_id >= AMDGPU_FAMILY_AI) 213 pm4[i++] = bytes - 1; 214 else 215 pm4[i++] = bytes; 216 pm4[i++] = 0; 217 pm4[i++] = 0xffffffff & src; 218 pm4[i++] = (0xffffffff00000000 & src) >> 32; 219 pm4[i++] = 0xffffffff & dst; 220 pm4[i++] = (0xffffffff00000000 & dst) >> 32; 221 } 222 223 size -= bytes; 224 src += bytes; 225 dst += bytes; 226 } 227 228 memset(&ib_info, 0, sizeof(ib_info)); 229 ib_info.ib_mc_address = virtual[0]; 230 ib_info.size = i; 231 232 memset(&ibs_request, 0, sizeof(ibs_request)); 233 ibs_request.ip_type = AMDGPU_HW_IP_DMA; 234 ibs_request.ring = 0; 235 ibs_request.number_of_ibs = 1; 236 ibs_request.ibs = &ib_info; 237 ibs_request.fence_info.handle = NULL; 238 239 r = clock_gettime(CLOCK_MONOTONIC, &start); 240 if (r) 241 return errno; 242 243 r = amdgpu_bo_list_create(device_handle, num_buffers, resources, NULL, 244 &ibs_request.resources); 245 if (r) 246 return r; 247 248 for (i = 0; i < count; ++i) { 249 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 250 if (r) 251 return r; 252 } 253 254 r = amdgpu_bo_list_destroy(ibs_request.resources); 255 if (r) 256 return r; 257 258 memset(&fence_status, 0, sizeof(fence_status)); 259 fence_status.ip_type = ibs_request.ip_type; 260 fence_status.ip_instance = 0; 261 fence_status.ring = ibs_request.ring; 262 fence_status.context = context_handle; 263 fence_status.fence = ibs_request.seq_no; 264 r = amdgpu_cs_query_fence_status(&fence_status, 265 AMDGPU_TIMEOUT_INFINITE, 266 0, &expired); 267 if (r) 268 return r; 269 270 r = clock_gettime(CLOCK_MONOTONIC, &stop); 271 if (r) 272 return errno; 273 274 delta = stop.tv_nsec + stop.tv_sec * 1000000000UL; 275 delta -= start.tv_nsec + start.tv_sec * 1000000000UL; 276 277 fprintf(stdout, "Submitted %u IBs to copy from %u(%" PRIx64 ") to %u(%" PRIx64 ") %" PRIu64 " bytes took %" PRIu64 " usec\n", 278 count, from, virtual[from], to, virtual[to], copied, delta / 1000); 279 return 0; 280 } 281 282 void next_arg(int argc, char **argv, const char *msg) 283 { 284 optarg = argv[optind++]; 285 if (optind > argc || optarg[0] == '-') { 286 fprintf(stderr, "%s\n", msg); 287 exit(EXIT_FAILURE); 288 } 289 } 290 291 uint64_t parse_size(void) 292 { 293 uint64_t size; 294 char ext[2]; 295 296 ext[0] = 0; 297 if (sscanf(optarg, "%" PRIi64 "%1[kmgKMG]", &size, ext) < 1) { 298 fprintf(stderr, "Can't parse size arg: %s\n", optarg); 299 exit(EXIT_FAILURE); 300 } 301 switch (ext[0]) { 302 case 'k': 303 case 'K': 304 size *= 1024; 305 break; 306 case 'm': 307 case 'M': 308 size *= 1024 * 1024; 309 break; 310 case 'g': 311 case 'G': 312 size *= 1024 * 1024 * 1024; 313 break; 314 default: 315 break; 316 } 317 return size; 318 } 319 320 int main(int argc, char **argv) 321 { 322 uint32_t major_version, minor_version; 323 uint32_t domain, from, to, count; 324 uint64_t size; 325 int fd, r, c; 326 327 fd = amdgpu_open_device(); 328 if (fd < 0) { 329 perror("Cannot open AMDGPU device"); 330 exit(EXIT_FAILURE); 331 } 332 333 r = amdgpu_device_initialize(fd, &major_version, &minor_version, &device_handle); 334 if (r) { 335 fprintf(stderr, "amdgpu_device_initialize returned %d\n", r); 336 exit(EXIT_FAILURE); 337 } 338 339 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 340 if (r) { 341 fprintf(stderr, "amdgpu_cs_ctx_create returned %d\n", r); 342 exit(EXIT_FAILURE); 343 } 344 345 if (argc == 1) { 346 fprintf(stderr, usage, argv[0]); 347 exit(EXIT_FAILURE); 348 } 349 350 r = alloc_bo(AMDGPU_GEM_DOMAIN_GTT, 2ULL * 1024 * 1024); 351 if (r) { 352 fprintf(stderr, "Buffer allocation failed with %d\n", r); 353 exit(EXIT_FAILURE); 354 } 355 356 r = amdgpu_bo_cpu_map(resources[0], (void **)&pm4); 357 if (r) { 358 fprintf(stderr, "Buffer mapping failed with %d\n", r); 359 exit(EXIT_FAILURE); 360 } 361 362 opterr = 0; 363 while ((c = getopt(argc, argv, options)) != -1) { 364 switch (c) { 365 case 'b': 366 if (!strcmp(optarg, "v")) 367 domain = AMDGPU_GEM_DOMAIN_VRAM; 368 else if (!strcmp(optarg, "g")) 369 domain = AMDGPU_GEM_DOMAIN_GTT; 370 else if (!strcmp(optarg, "vg")) 371 domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT; 372 else { 373 fprintf(stderr, "Invalid domain: %s\n", optarg); 374 exit(EXIT_FAILURE); 375 } 376 next_arg(argc, argv, "Missing buffer size"); 377 size = parse_size(); 378 if (size < getpagesize()) { 379 fprintf(stderr, "Buffer size to small %" PRIu64 "\n", size); 380 exit(EXIT_FAILURE); 381 } 382 r = alloc_bo(domain, size); 383 if (r) { 384 fprintf(stderr, "Buffer allocation failed with %d\n", r); 385 exit(EXIT_FAILURE); 386 } 387 break; 388 case 'c': 389 if (sscanf(optarg, "%u", &from) != 1) { 390 fprintf(stderr, "Can't parse from buffer: %s\n", optarg); 391 exit(EXIT_FAILURE); 392 } 393 next_arg(argc, argv, "Missing to buffer"); 394 if (sscanf(optarg, "%u", &to) != 1) { 395 fprintf(stderr, "Can't parse to buffer: %s\n", optarg); 396 exit(EXIT_FAILURE); 397 } 398 next_arg(argc, argv, "Missing size"); 399 size = parse_size(); 400 next_arg(argc, argv, "Missing count"); 401 count = parse_size(); 402 r = submit_ib(from, to, size, count); 403 if (r) { 404 fprintf(stderr, "IB submission failed with %d\n", r); 405 exit(EXIT_FAILURE); 406 } 407 break; 408 case '?': 409 case 'h': 410 fprintf(stderr, usage, argv[0]); 411 exit(EXIT_SUCCESS); 412 default: 413 fprintf(stderr, usage, argv[0]); 414 exit(EXIT_FAILURE); 415 } 416 } 417 418 return EXIT_SUCCESS; 419 } 420