1 /*
2 * Copyright © 2024 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <sys/mman.h>
10
11 #include <lua.h>
12 #include <lualib.h>
13 #include <lauxlib.h>
14
15 #include "util/ralloc.h"
16
17 #include <xf86drm.h>
18 #include "drm-uapi/i915_drm.h"
19 #include "drm-uapi/xe_drm.h"
20
21 #include "intel/compiler/brw_asm.h"
22 #include "intel/compiler/brw_isa_info.h"
23 #include "intel/common/intel_gem.h"
24 #include "intel/common/xe/intel_engine.h"
25 #include "intel/decoder/intel_decoder.h"
26 #include "intel/dev/intel_debug.h"
27
28 #include "executor.h"
29
30 enum {
31 /* Predictable base addresses here make it easier to spot errors. */
32 EXECUTOR_BO_BATCH_ADDR = 0x10000000,
33 EXECUTOR_BO_EXTRA_ADDR = 0x20000000,
34 EXECUTOR_BO_DATA_ADDR = 0x30000000,
35
36 /* Apply to all BOs. */
37 EXECUTOR_BO_SIZE = 10 * 1024 * 1024,
38 };
39
40 static void
print_help()41 print_help()
42 {
43 printf(
44 "Executes shaders written for Intel GPUs\n"
45 "usage: executor FILENAME\n"
46 "\n"
47 "The input is a Lua script that can perform data manipulation\n"
48 "and dispatch execution of compute shaders, written in Xe assembly,\n"
49 "the same format used by the brw_asm assembler or when dumping\n"
50 "shaders in debug mode.\n"
51 "\n"
52 "The goal is to have a tool to experiment directly with certain\n"
53 "assembly instructions and the shared units without having to\n"
54 "instrument the drivers.\n"
55 "\n"
56 "EXECUTION CONTEXT\n"
57 "\n"
58 "By default compute shaders are used with SIMD8 for Gfx9-125 and SIMD16\n"
59 "for Xe2. Only a single thread is dispatched. A data buffer is used to\n"
60 "pipe data into the shader and out of it, it is bound to the graphics\n"
61 "address 0x%08x.\n"
62 "\n"
63 "The Gfx versions have differences in their assembly and shared units, so\n"
64 "other than very simple examples, scripts for this program will be either\n"
65 "specific to a version or provide shader variants for multiple versions.\n"
66 "\n"
67 "ASSEMBLY MACROS\n"
68 "\n"
69 "In addition to regular instructions, the follow macros will generate\n"
70 "assembly code based on the Gfx version being executed. Unlike in regular\n"
71 "instructions, REGs don't use regions and can't be immediates.\n"
72 "\n"
73 "- @eot\n"
74 " Send an EOT message.\n"
75 "\n"
76 "- @mov REG IMM\n"
77 " Like a regular MOV but accepts numbers in both decimal and\n"
78 " floating-point.\n"
79 "\n"
80 "- @id REG\n"
81 " Write a local invocation index into REG.\n"
82 "\n"
83 "- @read DST_REG OFFSET_REG\n"
84 " Read 32-bit values from the memory buffer at OFFSET_REG into DST_REG.\n"
85 "\n"
86 "- @write OFFSET_REG SRC_REG\n"
87 " Write 32-bit values from SRC_REG to the memory buffer at OFFSET_REG.\n"
88 "\n"
89 "- @syncnop\n"
90 " Produce a coarse grained sync.nop (when applicable) to ensure data from\n"
91 " macros above are read/written.\n"
92 "\n"
93 "LUA ENVIRONMENT\n"
94 "\n"
95 "In addition to the regular Lua standard library the following variables and.\n"
96 "functions are available.\n"
97 "\n"
98 "- execute({src=STR, data=ARRAY}) -> ARRAY\n"
99 " Takes a table as argument. The 'src' in the table contains the shader to be\n"
100 " executed. The 'data' argument will be used to fill the data buffer with 32-bit\n"
101 " values. The function returns an ARRAY with the contents of the data buffer\n"
102 " after the shader completes.\n"
103 "\n"
104 "- dump(ARRAY, COUNT)\n"
105 " Pretty print the COUNT first elements of an array of 32-bit values.\n"
106 "\n"
107 "- check_ver(V, ...), check_verx10(V, ...)\n"
108 " Exit if the Gfx version being executed isn't in the arguments list.\n"
109 "\n"
110 "- ver, verx10\n"
111 " Variables containing the Gfx version being executed.\n"
112 "\n"
113 "This program was compiled with %s.\n"
114 "\n"
115 "ENVIRONMENT VARIABLES\n"
116 "\n"
117 "The following INTEL_DEBUG values (comma separated) are used:\n"
118 "\n"
119 " - bat Dumps the batch buffer.\n"
120 " - color Uses colors for the batch buffer dump.\n"
121 " - cs Dumps the source after macro processing\n"
122 " the final assembly.\n"
123 "\n"
124 "EXAMPLE\n"
125 "\n"
126 "The following script\n"
127 "\n"
128 " local r = execute {\n"
129 " data={ [42] = 0x100 },\n"
130 " src=[[\n"
131 " @mov g1 42\n"
132 " @read g2 g1\n"
133 "\n"
134 " @id g3\n"
135 "\n"
136 " add(8) g4<1>UD g2<8,8,1>UD g3<8,8,1>UD { align1 @1 1Q };\n"
137 "\n"
138 " @write g3 g4\n"
139 " @eot\n"
140 " ]]\n"
141 " }\n"
142 "\n"
143 " dump(r, 4)\n"
144 "\n"
145 "Will produce the following output\n"
146 "\n"
147 " [0x00000000] 0x00000100 0x00000101 0x00000102 0x00000103\n"
148 "\n"
149 "More examples can be found in the examples/ directory in the source code.\n"
150 "\n", EXECUTOR_BO_DATA_ADDR, LUA_RELEASE);
151 }
152
153 static struct {
154 struct intel_device_info devinfo;
155 struct isl_device isl_dev;
156 struct brw_isa_info isa;
157 int fd;
158 } E;
159
160 #define genX_call(func, ...) \
161 switch (E.devinfo.verx10) { \
162 case 90: gfx9_ ##func(__VA_ARGS__); break; \
163 case 110: gfx11_ ##func(__VA_ARGS__); break; \
164 case 120: gfx12_ ##func(__VA_ARGS__); break; \
165 case 125: gfx125_##func(__VA_ARGS__); break; \
166 case 200: gfx20_ ##func(__VA_ARGS__); break; \
167 case 300: gfx30_ ##func(__VA_ARGS__); break; \
168 default: unreachable("Unsupported hardware generation"); \
169 }
170
171 static void
executor_create_bo(executor_context * ec,executor_bo * bo,uint64_t addr,uint32_t size_in_bytes)172 executor_create_bo(executor_context *ec, executor_bo *bo, uint64_t addr, uint32_t size_in_bytes)
173 {
174 if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
175 struct drm_i915_gem_create gem_create = {
176 .size = size_in_bytes,
177 };
178
179 int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
180 if (err)
181 failf("i915_gem_create");
182
183 struct drm_i915_gem_mmap_offset mm = {
184 .handle = gem_create.handle,
185 .flags = ec->devinfo->has_local_mem ? I915_MMAP_OFFSET_FIXED
186 : I915_MMAP_OFFSET_WC,
187 };
188
189 err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mm);
190 if (err)
191 failf("i915_gem_mmap_offset");
192
193 bo->handle = gem_create.handle;
194 bo->map = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
195 MAP_SHARED, ec->fd, mm.offset);
196 if (!bo->map)
197 failf("mmap");
198 } else {
199 assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
200
201 struct drm_xe_gem_create gem_create = {
202 .size = size_in_bytes,
203 .cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
204 .placement = 1u << ec->devinfo->mem.sram.mem.instance,
205 };
206
207 int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
208 if (err)
209 failf("xe_gem_create");
210
211 struct drm_xe_gem_mmap_offset mm = {
212 .handle = gem_create.handle,
213 };
214
215 err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
216 if (err)
217 failf("xe_gem_mmap_offset");
218
219 bo->handle = gem_create.handle;
220 bo->map = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
221 MAP_SHARED, ec->fd, mm.offset);
222 if (!bo->map)
223 failf("mmap");
224 }
225
226 bo->size = size_in_bytes;
227 bo->addr = addr;
228 bo->cursor = bo->map;
229 }
230
231 static void
executor_destroy_bo(executor_context * ec,executor_bo * bo)232 executor_destroy_bo(executor_context *ec, executor_bo *bo)
233 {
234 struct drm_gem_close gem_close = {
235 .handle = bo->handle,
236 };
237
238 int err = munmap(bo->map, bo->size);
239 if (err)
240 failf("munmap");
241
242 err = intel_ioctl(ec->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
243 if (err)
244 failf("gem_close");
245
246 memset(bo, 0, sizeof(*bo));
247 }
248
249 static void
executor_print_bo(executor_bo * bo,const char * name)250 executor_print_bo(executor_bo *bo, const char *name)
251 {
252 assert((bo->cursor - bo->map) % 4 == 0);
253 uint32_t *dw = bo->map;
254 uint32_t len = (uint32_t *)bo->cursor - dw;
255
256 printf("=== %s (0x%08"PRIx64", %td bytes) ===\n", name, bo->addr, bo->cursor - bo->map);
257
258 for (int i = 0; i < len; i++) {
259 if ((i % 8) == 0) printf("[0x%08x] ", (i*4) + (uint32_t)bo->addr);
260 printf("0x%08x ", dw[i]);
261 if ((i % 8) == 7) printf("\n");
262 }
263 printf("\n");
264 }
265
266 void *
executor_alloc_bytes(executor_bo * bo,uint32_t size)267 executor_alloc_bytes(executor_bo *bo, uint32_t size)
268 {
269 return executor_alloc_bytes_aligned(bo, size, 0);
270 }
271
272 void *
executor_alloc_bytes_aligned(executor_bo * bo,uint32_t size,uint32_t alignment)273 executor_alloc_bytes_aligned(executor_bo *bo, uint32_t size, uint32_t alignment)
274 {
275 void *r = bo->cursor;
276 if (alignment) {
277 r = (void *)(((uintptr_t)r + alignment-1) & ~((uintptr_t)alignment-1));
278 }
279 bo->cursor = r + size;
280 return r;
281 }
282
283 executor_address
executor_address_of_ptr(executor_bo * bo,void * ptr)284 executor_address_of_ptr(executor_bo *bo, void *ptr)
285 {
286 return (executor_address){ptr - bo->map + bo->addr};
287 }
288
289 static int
get_drm_device(struct intel_device_info * devinfo)290 get_drm_device(struct intel_device_info *devinfo)
291 {
292 drmDevicePtr devices[8];
293 int max_devices = drmGetDevices2(0, devices, 8);
294
295 int i, fd = -1;
296 for (i = 0; i < max_devices; i++) {
297 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
298 devices[i]->bustype == DRM_BUS_PCI &&
299 devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
300 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
301 if (fd < 0)
302 continue;
303
304 if (!intel_get_device_info_from_fd(fd, devinfo, -1, -1) ||
305 devinfo->ver < 8) {
306 close(fd);
307 fd = -1;
308 continue;
309 }
310
311 /* Found a device! */
312 break;
313 }
314 }
315 drmFreeDevices(devices, max_devices);
316
317 return fd;
318 }
319
320 static struct intel_batch_decode_bo
decode_get_bo(void * _ec,bool ppgtt,uint64_t address)321 decode_get_bo(void *_ec, bool ppgtt, uint64_t address)
322 {
323 executor_context *ec = _ec;
324 struct intel_batch_decode_bo bo = {0};
325
326 if (address >= ec->bo.batch.addr && address < ec->bo.batch.addr + ec->bo.batch.size) {
327 bo.addr = ec->bo.batch.addr;
328 bo.size = ec->bo.batch.size;
329 bo.map = ec->bo.batch.map;
330 } else if (address >= ec->bo.extra.addr && address < ec->bo.extra.addr + ec->bo.extra.size) {
331 bo.addr = ec->bo.extra.addr;
332 bo.size = ec->bo.extra.size;
333 bo.map = ec->bo.extra.map;
334 } else if (address >= ec->bo.data.addr && address < ec->bo.data.addr + ec->bo.data.size) {
335 bo.addr = ec->bo.data.addr;
336 bo.size = ec->bo.data.size;
337 bo.map = ec->bo.data.map;
338 }
339
340 return bo;
341 }
342
343 static unsigned
decode_get_state_size(void * _ec,uint64_t address,uint64_t base_address)344 decode_get_state_size(void *_ec, uint64_t address, uint64_t base_address)
345 {
346 return EXECUTOR_BO_SIZE;
347 }
348
349 static void
parse_execute_data(executor_context * ec,lua_State * L,int table_idx)350 parse_execute_data(executor_context *ec, lua_State *L, int table_idx)
351 {
352 uint32_t *data = ec->bo.data.map;
353
354 lua_pushvalue(L, table_idx);
355
356 lua_pushnil(L);
357 while (lua_next(L, -2) != 0) {
358 int val_idx = lua_gettop(L);
359 int key_idx = val_idx - 1;
360
361 if (lua_type(L, key_idx) != LUA_TNUMBER || !lua_isinteger(L, key_idx))
362 failf("invalid key for data in execute call");
363
364 lua_Integer key = lua_tointeger(L, key_idx);
365 assert(key <= 10 * 1024 * 1024 / 4);
366 lua_Integer val = lua_tointeger(L, val_idx);
367 data[key] = val;
368
369 lua_pop(L, 1);
370 }
371
372 lua_pop(L, 1);
373 }
374
375 static void
parse_execute_args(executor_context * ec,lua_State * L,executor_params * params)376 parse_execute_args(executor_context *ec, lua_State *L, executor_params *params)
377 {
378 int opts = lua_gettop(L);
379
380 lua_pushnil(L);
381
382 while (lua_next(L, opts) != 0) {
383 int val_idx = lua_gettop(L);
384 int key_idx = val_idx - 1;
385
386 if (lua_type(L, key_idx) != LUA_TSTRING) {
387 lua_pop(L, 1);
388 continue;
389 }
390
391 const char *key = lua_tostring(L, key_idx);
392
393 if (!strcmp(key, "src")) {
394 params->original_src = ralloc_strdup(ec->mem_ctx, luaL_checkstring(L, val_idx));
395 } else if (!strcmp(key, "data")) {
396 parse_execute_data(ec, L, val_idx);
397 } else {
398 failf("unknown parameter '%s' for execute()", key);
399 }
400
401 lua_pop(L, 1);
402 }
403 }
404
405 static void
executor_context_setup(executor_context * ec)406 executor_context_setup(executor_context *ec)
407 {
408 if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
409 struct drm_i915_gem_context_create create = {0};
410 int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
411 if (err)
412 failf("i915_gem_context_create");
413 ec->i915.ctx_id = create.ctx_id;
414 } else {
415 assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
416
417 struct drm_xe_vm_create create = {
418 .flags = DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
419 };
420 int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_CREATE, &create);
421 if (err)
422 failf("xe_vm_create");
423 ec->xe.vm_id = create.vm_id;
424
425 struct drm_xe_engine_class_instance instance = {0};
426
427 struct intel_query_engine_info *engines_info = xe_engine_get_info(ec->fd);
428 assert(engines_info);
429
430 bool found_engine = false;
431 for (int i = 0; i < engines_info->num_engines; i++) {
432 struct intel_engine_class_instance *e = &engines_info->engines[i];
433 if (e->engine_class == INTEL_ENGINE_CLASS_RENDER) {
434 instance.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
435 instance.engine_instance = e->engine_instance;
436 instance.gt_id = e->gt_id;
437 found_engine = true;
438 break;
439 }
440 }
441 assert(found_engine);
442 free(engines_info);
443
444 struct drm_xe_exec_queue_create queue_create = {
445 .vm_id = ec->xe.vm_id,
446 .width = 1,
447 .num_placements = 1,
448 .instances = (uintptr_t)&instance,
449 };
450 err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &queue_create);
451 if (err)
452 failf("xe_exec_queue_create");
453 ec->xe.queue_id = queue_create.exec_queue_id;
454 }
455
456 executor_create_bo(ec, &ec->bo.batch, EXECUTOR_BO_BATCH_ADDR, EXECUTOR_BO_SIZE);
457 executor_create_bo(ec, &ec->bo.extra, EXECUTOR_BO_EXTRA_ADDR, EXECUTOR_BO_SIZE);
458 executor_create_bo(ec, &ec->bo.data, EXECUTOR_BO_DATA_ADDR, EXECUTOR_BO_SIZE);
459
460 uint32_t *data = ec->bo.data.map;
461 for (int i = 0; i < EXECUTOR_BO_SIZE / 4; i++)
462 data[i] = 0xABABABAB;
463 }
464
465 static void
executor_context_dispatch(executor_context * ec)466 executor_context_dispatch(executor_context *ec)
467 {
468 if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
469 struct drm_i915_gem_exec_object2 objs[] = {
470 {
471 .handle = ec->bo.batch.handle,
472 .offset = ec->bo.batch.addr,
473 .flags = EXEC_OBJECT_PINNED,
474 },
475 {
476 .handle = ec->bo.extra.handle,
477 .offset = ec->bo.extra.addr,
478 .flags = EXEC_OBJECT_PINNED,
479 },
480 {
481 .handle = ec->bo.data.handle,
482 .offset = ec->bo.data.addr,
483 .flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE,
484 },
485 };
486
487 struct drm_i915_gem_execbuffer2 exec = {0};
488 exec.buffers_ptr = (uintptr_t)objs;
489 exec.buffer_count = ARRAY_SIZE(objs);
490 exec.batch_start_offset = ec->batch_start - ec->bo.batch.addr;
491 exec.flags = I915_EXEC_BATCH_FIRST;
492 exec.rsvd1 = ec->i915.ctx_id;
493
494 int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
495 if (err)
496 failf("i915_gem_execbuffer2");
497
498 struct drm_i915_gem_wait wait = {0};
499 wait.bo_handle = ec->bo.batch.handle;
500 wait.timeout_ns = INT64_MAX;
501
502 err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
503 if (err)
504 failf("i915_gem_wait");
505 } else {
506 assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
507
508 /* First syncobj is signalled by the binding operation and waited by the
509 * execution of the batch buffer.
510 *
511 * Second syncobj is singalled by the execution of batch buffer and
512 * waited at the end.
513 */
514 uint32_t sync_handles[2] = {0};
515 for (int i = 0; i < 2; i++) {
516 struct drm_syncobj_create sync_create = {0};
517 int err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_CREATE, &sync_create);
518 if (err)
519 failf("syncobj_create");
520 sync_handles[i] = sync_create.handle;
521 }
522
523 struct drm_xe_vm_bind_op bind_ops[] = {
524 {
525 .op = DRM_XE_VM_BIND_OP_MAP,
526 .obj = ec->bo.batch.handle,
527 .addr = ec->bo.batch.addr,
528 .range = EXECUTOR_BO_SIZE,
529 .pat_index = ec->devinfo->pat.cached_coherent.index,
530 },
531 {
532 .op = DRM_XE_VM_BIND_OP_MAP,
533 .obj = ec->bo.extra.handle,
534 .addr = ec->bo.extra.addr,
535 .range = EXECUTOR_BO_SIZE,
536 .pat_index = ec->devinfo->pat.cached_coherent.index,
537 },
538 {
539 .op = DRM_XE_VM_BIND_OP_MAP,
540 .obj = ec->bo.data.handle,
541 .addr = ec->bo.data.addr,
542 .range = EXECUTOR_BO_SIZE,
543 .pat_index = ec->devinfo->pat.cached_coherent.index,
544 },
545 };
546
547 struct drm_xe_sync bind_syncs[] = {
548 {
549 .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
550 .handle = sync_handles[0],
551 .flags = DRM_XE_SYNC_FLAG_SIGNAL,
552 },
553 };
554
555 struct drm_xe_vm_bind bind = {
556 .vm_id = ec->xe.vm_id,
557 .num_binds = ARRAY_SIZE(bind_ops),
558 .vector_of_binds = (uintptr_t)bind_ops,
559 .num_syncs = 1,
560 .syncs = (uintptr_t)bind_syncs,
561 };
562
563 int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_BIND, &bind);
564 if (err)
565 failf("xe_vm_bind");
566
567 struct drm_xe_sync exec_syncs[] = {
568 {
569 .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
570 .handle = sync_handles[0],
571 },
572 {
573 .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
574 .handle = sync_handles[1],
575 .flags = DRM_XE_SYNC_FLAG_SIGNAL,
576 }
577 };
578
579 struct drm_xe_exec exec = {
580 .exec_queue_id = ec->xe.queue_id,
581 .num_batch_buffer = 1,
582 .address = ec->batch_start,
583 .num_syncs = 2,
584 .syncs = (uintptr_t)exec_syncs,
585 };
586 err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC, &exec);
587 if (err)
588 failf("xe_exec");
589
590 struct drm_syncobj_wait wait = {
591 .count_handles = 1,
592 .handles = (uintptr_t)&sync_handles[1],
593 .timeout_nsec = INT64_MAX,
594 };
595 err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
596 if (err)
597 failf("syncobj_wait");
598 }
599 }
600
601 static void
executor_context_teardown(executor_context * ec)602 executor_context_teardown(executor_context *ec)
603 {
604 executor_destroy_bo(ec, &ec->bo.batch);
605 executor_destroy_bo(ec, &ec->bo.extra);
606 executor_destroy_bo(ec, &ec->bo.data);
607
608 if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
609 struct drm_i915_gem_context_destroy destroy = {
610 .ctx_id = ec->i915.ctx_id,
611 };
612 int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
613 if (err)
614 failf("i915_gem_context_destroy");
615 } else {
616 assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
617
618 struct drm_xe_exec_queue_destroy queue_destroy = {
619 .exec_queue_id = ec->xe.queue_id,
620 };
621 int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &queue_destroy);
622 if (err)
623 failf("xe_exec_queue_destroy");
624
625 struct drm_xe_vm_destroy destroy = {
626 .vm_id = ec->xe.vm_id,
627 };
628 err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_DESTROY, &destroy);
629 if (err)
630 failf("xe_vm_destroy");
631 }
632 }
633
634 static int
l_execute(lua_State * L)635 l_execute(lua_State *L)
636 {
637 executor_context ec = {
638 .mem_ctx = ralloc_context(NULL),
639 .devinfo = &E.devinfo,
640 .isl_dev = &E.isl_dev,
641 .fd = E.fd,
642 };
643
644 executor_context_setup(&ec);
645
646 executor_params params = {0};
647
648 {
649 if (lua_gettop(L) != 1)
650 failf("execute() must have a single table argument");
651
652 parse_execute_args(&ec, L, ¶ms);
653
654 const char *src = executor_apply_macros(&ec, params.original_src);
655
656 FILE *f = fmemopen((void *)src, strlen(src), "r");
657
658 brw_assemble_flags flags = 0;
659
660 if (INTEL_DEBUG(DEBUG_CS)) {
661 printf("=== Processed assembly source ===\n"
662 "%s"
663 "=================================\n\n", src);
664 flags = BRW_ASSEMBLE_DUMP;
665 }
666
667 brw_assemble_result asm = brw_assemble(ec.mem_ctx, ec.devinfo, f, "", flags);
668 fclose(f);
669
670 if (!asm.bin)
671 failf("assembler failure");
672
673 params.kernel_bin = asm.bin;
674 params.kernel_size = asm.bin_size;
675 }
676
677 genX_call(emit_execute, &ec, ¶ms);
678
679 if (INTEL_DEBUG(DEBUG_BATCH)) {
680 struct intel_batch_decode_ctx decoder;
681 enum intel_batch_decode_flags flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
682 if (INTEL_DEBUG(DEBUG_COLOR))
683 flags |= INTEL_BATCH_DECODE_IN_COLOR;
684
685 intel_batch_decode_ctx_init_brw(&decoder, &E.isa, &E.devinfo, stdout,
686 flags, NULL, decode_get_bo, decode_get_state_size, &ec);
687
688 assert(ec.bo.batch.cursor > ec.bo.batch.map);
689 const int batch_offset = ec.batch_start - ec.bo.batch.addr;
690 const int batch_size = (ec.bo.batch.cursor - ec.bo.batch.map) - batch_offset;
691 assert(batch_offset < batch_size);
692
693 intel_print_batch(&decoder, ec.bo.batch.map, batch_size, ec.batch_start, false);
694
695 intel_batch_decode_ctx_finish(&decoder);
696 }
697
698 executor_context_dispatch(&ec);
699
700 {
701 /* TODO: Use userdata to return a wrapped C array instead of building
702 * values. Could make integration with array operations better.
703 */
704 uint32_t *data = ec.bo.data.map;
705 const int n = ec.bo.data.size / 4;
706 lua_createtable(L, n, 0);
707 for (int i = 0; i < 8; i++) {
708 lua_pushinteger(L, data[i]);
709 lua_seti(L, -2, i);
710 }
711 }
712
713 executor_context_teardown(&ec);
714 ralloc_free(ec.mem_ctx);
715
716 return 1;
717 }
718
719 static int
l_dump(lua_State * L)720 l_dump(lua_State *L)
721 {
722 /* TODO: Use a table to add options for the dump, e.g.
723 * starting offset, format, etc.
724 */
725
726 assert(lua_type(L, 1) == LUA_TTABLE);
727 assert(lua_type(L, 2) == LUA_TNUMBER);
728 assert(lua_isinteger(L, 2));
729
730 lua_Integer len_ = lua_tointeger(L, 2);
731 assert(len_ >= 0 && len_ <= INT_MAX);
732 int len = len_;
733
734 int i;
735 for (i = 0; i < len; i++) {
736 if (i%8 == 0) printf("[0x%08x]", i * 4);
737 lua_rawgeti(L, 1, i);
738 lua_Integer val = lua_tointeger(L, -1);
739 printf(" 0x%08x", (uint32_t)val);
740 lua_pop(L, 1);
741 if (i%8 == 7) printf("\n");
742 }
743 if (i%8 != 0) printf("\n");
744 return 0;
745 }
746
747 static int
l_check_ver(lua_State * L)748 l_check_ver(lua_State *L)
749 {
750 int top = lua_gettop(L);
751 for (int i = 1; i <= top; i++) {
752 lua_Integer v = luaL_checknumber(L, i);
753 if (E.devinfo.ver == v) {
754 return 0;
755 }
756 }
757 failf("script doesn't support version=%d verx10=%d\n",
758 E.devinfo.ver, E.devinfo.verx10);
759 return 0;
760 }
761
762 static int
l_check_verx10(lua_State * L)763 l_check_verx10(lua_State *L)
764 {
765 int top = lua_gettop(L);
766 for (int i = 1; i <= top; i++) {
767 lua_Integer v = luaL_checknumber(L, i);
768 if (E.devinfo.verx10 == v) {
769 return 0;
770 }
771 }
772 failf("script doesn't support version=%d verx10=%d\n",
773 E.devinfo.ver, E.devinfo.verx10);
774 return 0;
775 }
776
777 /* TODO: Review numeric limits in the code, specially around Lua integer
778 * conversion.
779 */
780
781 int
main(int argc,char * argv[])782 main(int argc, char *argv[])
783 {
784 if (argc < 2 ||
785 !strcmp(argv[1], "--help") ||
786 !strcmp(argv[1], "-help") ||
787 !strcmp(argv[1], "-h") ||
788 !strcmp(argv[1], "help")) {
789 print_help();
790 return 0;
791 }
792
793 if (argc > 2) {
794 /* TODO: Expose extra arguments to the script as a variable. */
795 failf("invalid extra arguments\nusage: executor FILENAME");
796 return 1;
797 }
798
799 process_intel_debug_variable();
800
801 E.fd = get_drm_device(&E.devinfo);
802 isl_device_init(&E.isl_dev, &E.devinfo);
803 brw_init_isa_info(&E.isa, &E.devinfo);
804 assert(E.devinfo.kmd_type == INTEL_KMD_TYPE_I915 ||
805 E.devinfo.kmd_type == INTEL_KMD_TYPE_XE);
806
807 lua_State *L = luaL_newstate();
808
809 /* TODO: Could be nice to export some kind of builder interface,
810 * maybe even let the script construct a shader at the BRW IR
811 * level and let the later passes kick in.
812 */
813
814 luaL_openlibs(L);
815
816 lua_pushinteger(L, E.devinfo.ver);
817 lua_setglobal(L, "ver");
818
819 lua_pushinteger(L, E.devinfo.verx10);
820 lua_setglobal(L, "verx10");
821
822 lua_pushcfunction(L, l_execute);
823 lua_setglobal(L, "execute");
824
825 lua_pushcfunction(L, l_dump);
826 lua_setglobal(L, "dump");
827
828 lua_pushcfunction(L, l_check_ver);
829 lua_setglobal(L, "check_ver");
830
831 lua_pushcfunction(L, l_check_verx10);
832 lua_setglobal(L, "check_verx10");
833
834 const char *filename = argv[1];
835 int err = luaL_loadfile(L, filename);
836 if (err)
837 failf("failed to load script: %s", lua_tostring(L, -1));
838
839 err = lua_pcall(L, 0, 0, 0);
840 if (err)
841 failf("failed to run script: %s", lua_tostring(L, -1));
842
843 lua_close(L);
844 close(E.fd);
845
846 return 0;
847 }
848
849 void
failf(const char * fmt,...)850 failf(const char *fmt, ...)
851 {
852 va_list args;
853 va_start(args, fmt);
854 fprintf(stderr, "ERROR: ");
855 vfprintf(stderr, fmt, args);
856 fprintf(stderr, "\n");
857 va_end(args);
858 exit(1);
859 }
860