• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <sys/mman.h>
10 
11 #include <lua.h>
12 #include <lualib.h>
13 #include <lauxlib.h>
14 
15 #include "util/ralloc.h"
16 
17 #include <xf86drm.h>
18 #include "drm-uapi/i915_drm.h"
19 #include "drm-uapi/xe_drm.h"
20 
21 #include "intel/compiler/brw_asm.h"
22 #include "intel/compiler/brw_isa_info.h"
23 #include "intel/common/intel_gem.h"
24 #include "intel/common/xe/intel_engine.h"
25 #include "intel/decoder/intel_decoder.h"
26 #include "intel/dev/intel_debug.h"
27 
28 #include "executor.h"
29 
30 enum {
31    /* Predictable base addresses here make it easier to spot errors. */
32    EXECUTOR_BO_BATCH_ADDR = 0x10000000,
33    EXECUTOR_BO_EXTRA_ADDR = 0x20000000,
34    EXECUTOR_BO_DATA_ADDR  = 0x30000000,
35 
36    /* Apply to all BOs. */
37    EXECUTOR_BO_SIZE = 10 * 1024 * 1024,
38 };
39 
40 static void
print_help()41 print_help()
42 {
43    printf(
44       "Executes shaders written for Intel GPUs\n"
45       "usage: executor FILENAME\n"
46       "\n"
47       "The input is a Lua script that can perform data manipulation\n"
48       "and dispatch execution of compute shaders, written in Xe assembly,\n"
49       "the same format used by the brw_asm assembler or when dumping\n"
50       "shaders in debug mode.\n"
51       "\n"
52       "The goal is to have a tool to experiment directly with certain\n"
53       "assembly instructions and the shared units without having to\n"
54       "instrument the drivers.\n"
55       "\n"
56       "EXECUTION CONTEXT\n"
57       "\n"
58       "By default compute shaders are used with SIMD8 for Gfx9-125 and SIMD16\n"
59       "for Xe2.  Only a single thread is dispatched.  A data buffer is used to\n"
60       "pipe data into the shader and out of it, it is bound to the graphics\n"
61       "address 0x%08x.\n"
62       "\n"
63       "The Gfx versions have differences in their assembly and shared units, so\n"
64       "other than very simple examples, scripts for this program will be either\n"
65       "specific to a version or provide shader variants for multiple versions.\n"
66       "\n"
67       "ASSEMBLY MACROS\n"
68       "\n"
69       "In addition to regular instructions, the follow macros will generate\n"
70       "assembly code based on the Gfx version being executed.  Unlike in regular\n"
71       "instructions, REGs don't use regions and can't be immediates.\n"
72       "\n"
73       "- @eot\n"
74       "  Send an EOT message.\n"
75       "\n"
76       "- @mov REG IMM\n"
77       "  Like a regular MOV but accepts numbers in both decimal and\n"
78       "  floating-point.\n"
79       "\n"
80       "- @id REG\n"
81       "  Write a local invocation index into REG.\n"
82       "\n"
83       "- @read DST_REG OFFSET_REG\n"
84       "  Read 32-bit values from the memory buffer at OFFSET_REG into DST_REG.\n"
85       "\n"
86       "- @write OFFSET_REG SRC_REG\n"
87       "  Write 32-bit values from SRC_REG to the memory buffer at OFFSET_REG.\n"
88       "\n"
89       "- @syncnop\n"
90       "  Produce a coarse grained sync.nop (when applicable) to ensure data from\n"
91       "  macros above are read/written.\n"
92       "\n"
93       "LUA ENVIRONMENT\n"
94       "\n"
95       "In addition to the regular Lua standard library the following variables and.\n"
96       "functions are available.\n"
97       "\n"
98       "- execute({src=STR, data=ARRAY}) -> ARRAY\n"
99       "  Takes a table as argument.  The 'src' in the table contains the shader to be\n"
100       "  executed.  The 'data' argument will be used to fill the data buffer with 32-bit\n"
101       "  values.  The function returns an ARRAY with the contents of the data buffer\n"
102       "  after the shader completes.\n"
103       "\n"
104       "- dump(ARRAY, COUNT)\n"
105       "  Pretty print the COUNT first elements of an array of 32-bit values.\n"
106       "\n"
107       "- check_ver(V, ...), check_verx10(V, ...)\n"
108       "  Exit if the Gfx version being executed isn't in the arguments list.\n"
109       "\n"
110       "- ver, verx10\n"
111       "  Variables containing the Gfx version being executed.\n"
112       "\n"
113       "This program was compiled with %s.\n"
114       "\n"
115       "ENVIRONMENT VARIABLES\n"
116       "\n"
117       "The following INTEL_DEBUG values (comma separated) are used:\n"
118       "\n"
119       " - bat             Dumps the batch buffer.\n"
120       " - color           Uses colors for the batch buffer dump.\n"
121       " - cs              Dumps the source after macro processing\n"
122       "                   the final assembly.\n"
123       "\n"
124       "EXAMPLE\n"
125       "\n"
126       "The following script\n"
127       "\n"
128       "  local r = execute {\n"
129       "    data={ [42] = 0x100 },\n"
130       "    src=[[\n"
131       "      @mov     g1      42\n"
132       "      @read    g2      g1\n"
133       "\n"
134       "      @id      g3\n"
135       "\n"
136       "      add(8)   g4<1>UD  g2<8,8,1>UD  g3<8,8,1>UD  { align1 @1 1Q };\n"
137       "\n"
138       "      @write   g3       g4\n"
139       "      @eot\n"
140       "    ]]\n"
141       "  }\n"
142       "\n"
143       "  dump(r, 4)\n"
144       "\n"
145       "Will produce the following output\n"
146       "\n"
147       "   [0x00000000] 0x00000100 0x00000101 0x00000102 0x00000103\n"
148       "\n"
149       "More examples can be found in the examples/ directory in the source code.\n"
150       "\n", EXECUTOR_BO_DATA_ADDR, LUA_RELEASE);
151 }
152 
153 static struct {
154    struct intel_device_info devinfo;
155    struct isl_device isl_dev;
156    struct brw_isa_info isa;
157    int fd;
158 } E;
159 
160 #define genX_call(func, ...)                                \
161    switch (E.devinfo.verx10) {                              \
162    case 90:  gfx9_  ##func(__VA_ARGS__); break;             \
163    case 110: gfx11_ ##func(__VA_ARGS__); break;             \
164    case 120: gfx12_ ##func(__VA_ARGS__); break;             \
165    case 125: gfx125_##func(__VA_ARGS__); break;             \
166    case 200: gfx20_ ##func(__VA_ARGS__); break;             \
167    case 300: gfx30_ ##func(__VA_ARGS__); break;             \
168    default: unreachable("Unsupported hardware generation"); \
169    }
170 
171 static void
executor_create_bo(executor_context * ec,executor_bo * bo,uint64_t addr,uint32_t size_in_bytes)172 executor_create_bo(executor_context *ec, executor_bo *bo, uint64_t addr, uint32_t size_in_bytes)
173 {
174    if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
175       struct drm_i915_gem_create gem_create = {
176          .size = size_in_bytes,
177       };
178 
179       int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
180       if (err)
181          failf("i915_gem_create");
182 
183       struct drm_i915_gem_mmap_offset mm = {
184          .handle = gem_create.handle,
185          .flags  = ec->devinfo->has_local_mem ? I915_MMAP_OFFSET_FIXED
186                                               : I915_MMAP_OFFSET_WC,
187       };
188 
189       err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mm);
190       if (err)
191          failf("i915_gem_mmap_offset");
192 
193       bo->handle = gem_create.handle;
194       bo->map    = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
195                         MAP_SHARED, ec->fd, mm.offset);
196       if (!bo->map)
197          failf("mmap");
198    } else {
199       assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
200 
201       struct drm_xe_gem_create gem_create = {
202          .size        = size_in_bytes,
203          .cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
204          .placement   = 1u << ec->devinfo->mem.sram.mem.instance,
205       };
206 
207       int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
208       if (err)
209          failf("xe_gem_create");
210 
211       struct drm_xe_gem_mmap_offset mm = {
212          .handle = gem_create.handle,
213       };
214 
215       err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
216       if (err)
217          failf("xe_gem_mmap_offset");
218 
219       bo->handle = gem_create.handle;
220       bo->map    = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
221                         MAP_SHARED, ec->fd, mm.offset);
222       if (!bo->map)
223          failf("mmap");
224    }
225 
226    bo->size   = size_in_bytes;
227    bo->addr   = addr;
228    bo->cursor = bo->map;
229 }
230 
231 static void
executor_destroy_bo(executor_context * ec,executor_bo * bo)232 executor_destroy_bo(executor_context *ec, executor_bo *bo)
233 {
234    struct drm_gem_close gem_close = {
235       .handle = bo->handle,
236    };
237 
238    int err = munmap(bo->map, bo->size);
239    if (err)
240       failf("munmap");
241 
242    err = intel_ioctl(ec->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
243    if (err)
244       failf("gem_close");
245 
246    memset(bo, 0, sizeof(*bo));
247 }
248 
249 static void
executor_print_bo(executor_bo * bo,const char * name)250 executor_print_bo(executor_bo *bo, const char *name)
251 {
252    assert((bo->cursor - bo->map) % 4 == 0);
253    uint32_t *dw = bo->map;
254    uint32_t len = (uint32_t *)bo->cursor - dw;
255 
256    printf("=== %s (0x%08"PRIx64", %td bytes) ===\n", name, bo->addr, bo->cursor - bo->map);
257 
258    for (int i = 0; i < len; i++) {
259       if ((i % 8) == 0) printf("[0x%08x] ", (i*4) + (uint32_t)bo->addr);
260       printf("0x%08x ", dw[i]);
261       if ((i % 8) == 7) printf("\n");
262    }
263    printf("\n");
264 }
265 
266 void *
executor_alloc_bytes(executor_bo * bo,uint32_t size)267 executor_alloc_bytes(executor_bo *bo, uint32_t size)
268 {
269    return executor_alloc_bytes_aligned(bo, size, 0);
270 }
271 
272 void *
executor_alloc_bytes_aligned(executor_bo * bo,uint32_t size,uint32_t alignment)273 executor_alloc_bytes_aligned(executor_bo *bo, uint32_t size, uint32_t alignment)
274 {
275    void *r = bo->cursor;
276    if (alignment) {
277       r = (void *)(((uintptr_t)r + alignment-1) & ~((uintptr_t)alignment-1));
278    }
279    bo->cursor = r + size;
280    return r;
281 }
282 
283 executor_address
executor_address_of_ptr(executor_bo * bo,void * ptr)284 executor_address_of_ptr(executor_bo *bo, void *ptr)
285 {
286    return (executor_address){ptr - bo->map + bo->addr};
287 }
288 
289 static int
get_drm_device(struct intel_device_info * devinfo)290 get_drm_device(struct intel_device_info *devinfo)
291 {
292    drmDevicePtr devices[8];
293    int max_devices = drmGetDevices2(0, devices, 8);
294 
295    int i, fd = -1;
296    for (i = 0; i < max_devices; i++) {
297       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
298           devices[i]->bustype == DRM_BUS_PCI &&
299           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
300          fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
301          if (fd < 0)
302             continue;
303 
304          if (!intel_get_device_info_from_fd(fd, devinfo, -1, -1) ||
305              devinfo->ver < 8) {
306             close(fd);
307             fd = -1;
308             continue;
309          }
310 
311          /* Found a device! */
312          break;
313       }
314    }
315    drmFreeDevices(devices, max_devices);
316 
317    return fd;
318 }
319 
320 static struct intel_batch_decode_bo
decode_get_bo(void * _ec,bool ppgtt,uint64_t address)321 decode_get_bo(void *_ec, bool ppgtt, uint64_t address)
322 {
323    executor_context *ec = _ec;
324    struct intel_batch_decode_bo bo = {0};
325 
326    if (address >= ec->bo.batch.addr && address < ec->bo.batch.addr + ec->bo.batch.size) {
327       bo.addr = ec->bo.batch.addr;
328       bo.size = ec->bo.batch.size;
329       bo.map  = ec->bo.batch.map;
330    } else if (address >= ec->bo.extra.addr && address < ec->bo.extra.addr + ec->bo.extra.size) {
331       bo.addr = ec->bo.extra.addr;
332       bo.size = ec->bo.extra.size;
333       bo.map  = ec->bo.extra.map;
334    } else if (address >= ec->bo.data.addr && address < ec->bo.data.addr + ec->bo.data.size) {
335       bo.addr = ec->bo.data.addr;
336       bo.size = ec->bo.data.size;
337       bo.map  = ec->bo.data.map;
338    }
339 
340    return bo;
341 }
342 
343 static unsigned
decode_get_state_size(void * _ec,uint64_t address,uint64_t base_address)344 decode_get_state_size(void *_ec, uint64_t address, uint64_t base_address)
345 {
346    return EXECUTOR_BO_SIZE;
347 }
348 
349 static void
parse_execute_data(executor_context * ec,lua_State * L,int table_idx)350 parse_execute_data(executor_context *ec, lua_State *L, int table_idx)
351 {
352    uint32_t *data = ec->bo.data.map;
353 
354    lua_pushvalue(L, table_idx);
355 
356    lua_pushnil(L);
357    while (lua_next(L, -2) != 0) {
358       int val_idx = lua_gettop(L);
359       int key_idx = val_idx - 1;
360 
361       if (lua_type(L, key_idx) != LUA_TNUMBER || !lua_isinteger(L, key_idx))
362          failf("invalid key for data in execute call");
363 
364       lua_Integer key = lua_tointeger(L, key_idx);
365       assert(key <= 10 * 1024 * 1024 / 4);
366       lua_Integer val = lua_tointeger(L, val_idx);
367       data[key] = val;
368 
369       lua_pop(L, 1);
370    }
371 
372    lua_pop(L, 1);
373 }
374 
375 static void
parse_execute_args(executor_context * ec,lua_State * L,executor_params * params)376 parse_execute_args(executor_context *ec, lua_State *L, executor_params *params)
377 {
378    int opts = lua_gettop(L);
379 
380    lua_pushnil(L);
381 
382    while (lua_next(L, opts) != 0) {
383       int val_idx = lua_gettop(L);
384       int key_idx = val_idx - 1;
385 
386       if (lua_type(L, key_idx) != LUA_TSTRING) {
387          lua_pop(L, 1);
388          continue;
389       }
390 
391       const char *key = lua_tostring(L, key_idx);
392 
393       if (!strcmp(key, "src")) {
394          params->original_src = ralloc_strdup(ec->mem_ctx, luaL_checkstring(L, val_idx));
395       } else if (!strcmp(key, "data")) {
396          parse_execute_data(ec, L, val_idx);
397       } else {
398          failf("unknown parameter '%s' for execute()", key);
399       }
400 
401       lua_pop(L, 1);
402    }
403 }
404 
405 static void
executor_context_setup(executor_context * ec)406 executor_context_setup(executor_context *ec)
407 {
408    if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
409       struct drm_i915_gem_context_create create = {0};
410       int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
411       if (err)
412          failf("i915_gem_context_create");
413       ec->i915.ctx_id = create.ctx_id;
414    } else {
415       assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
416 
417       struct drm_xe_vm_create create = {
418          .flags = DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
419       };
420       int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_CREATE, &create);
421       if (err)
422          failf("xe_vm_create");
423       ec->xe.vm_id = create.vm_id;
424 
425       struct drm_xe_engine_class_instance instance = {0};
426 
427       struct intel_query_engine_info *engines_info = xe_engine_get_info(ec->fd);
428       assert(engines_info);
429 
430       bool found_engine = false;
431       for (int i = 0; i < engines_info->num_engines; i++) {
432          struct intel_engine_class_instance *e = &engines_info->engines[i];
433          if (e->engine_class == INTEL_ENGINE_CLASS_RENDER) {
434             instance.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
435             instance.engine_instance = e->engine_instance;
436             instance.gt_id = e->gt_id;
437             found_engine = true;
438             break;
439          }
440       }
441       assert(found_engine);
442       free(engines_info);
443 
444       struct drm_xe_exec_queue_create queue_create = {
445          .vm_id          = ec->xe.vm_id,
446          .width          = 1,
447          .num_placements = 1,
448          .instances      = (uintptr_t)&instance,
449       };
450       err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &queue_create);
451       if (err)
452          failf("xe_exec_queue_create");
453       ec->xe.queue_id = queue_create.exec_queue_id;
454    }
455 
456    executor_create_bo(ec, &ec->bo.batch, EXECUTOR_BO_BATCH_ADDR, EXECUTOR_BO_SIZE);
457    executor_create_bo(ec, &ec->bo.extra, EXECUTOR_BO_EXTRA_ADDR, EXECUTOR_BO_SIZE);
458    executor_create_bo(ec, &ec->bo.data,  EXECUTOR_BO_DATA_ADDR, EXECUTOR_BO_SIZE);
459 
460    uint32_t *data = ec->bo.data.map;
461    for (int i = 0; i < EXECUTOR_BO_SIZE / 4; i++)
462       data[i] = 0xABABABAB;
463 }
464 
465 static void
executor_context_dispatch(executor_context * ec)466 executor_context_dispatch(executor_context *ec)
467 {
468    if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
469       struct drm_i915_gem_exec_object2 objs[] = {
470          {
471             .handle = ec->bo.batch.handle,
472             .offset = ec->bo.batch.addr,
473             .flags  = EXEC_OBJECT_PINNED,
474          },
475          {
476             .handle = ec->bo.extra.handle,
477             .offset = ec->bo.extra.addr,
478             .flags  = EXEC_OBJECT_PINNED,
479          },
480          {
481             .handle = ec->bo.data.handle,
482             .offset = ec->bo.data.addr,
483             .flags  = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE,
484          },
485       };
486 
487       struct drm_i915_gem_execbuffer2 exec = {0};
488       exec.buffers_ptr = (uintptr_t)objs;
489       exec.buffer_count = ARRAY_SIZE(objs);
490       exec.batch_start_offset = ec->batch_start - ec->bo.batch.addr;
491       exec.flags = I915_EXEC_BATCH_FIRST;
492       exec.rsvd1 = ec->i915.ctx_id;
493 
494       int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
495       if (err)
496           failf("i915_gem_execbuffer2");
497 
498       struct drm_i915_gem_wait wait = {0};
499       wait.bo_handle = ec->bo.batch.handle;
500       wait.timeout_ns = INT64_MAX;
501 
502       err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
503       if (err)
504          failf("i915_gem_wait");
505    } else {
506       assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
507 
508       /* First syncobj is signalled by the binding operation and waited by the
509        * execution of the batch buffer.
510        *
511        * Second syncobj is singalled by the execution of batch buffer and
512        * waited at the end.
513        */
514       uint32_t sync_handles[2] = {0};
515       for (int i = 0; i < 2; i++) {
516          struct drm_syncobj_create sync_create = {0};
517          int err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_CREATE, &sync_create);
518          if (err)
519             failf("syncobj_create");
520          sync_handles[i] = sync_create.handle;
521       }
522 
523       struct drm_xe_vm_bind_op bind_ops[] = {
524          {
525             .op        = DRM_XE_VM_BIND_OP_MAP,
526             .obj       = ec->bo.batch.handle,
527             .addr      = ec->bo.batch.addr,
528             .range     = EXECUTOR_BO_SIZE,
529             .pat_index = ec->devinfo->pat.cached_coherent.index,
530          },
531          {
532             .op        = DRM_XE_VM_BIND_OP_MAP,
533             .obj       = ec->bo.extra.handle,
534             .addr      = ec->bo.extra.addr,
535             .range     = EXECUTOR_BO_SIZE,
536             .pat_index = ec->devinfo->pat.cached_coherent.index,
537          },
538          {
539             .op        = DRM_XE_VM_BIND_OP_MAP,
540             .obj       = ec->bo.data.handle,
541             .addr      = ec->bo.data.addr,
542             .range     = EXECUTOR_BO_SIZE,
543             .pat_index = ec->devinfo->pat.cached_coherent.index,
544          },
545       };
546 
547       struct drm_xe_sync bind_syncs[] = {
548          {
549             .type   = DRM_XE_SYNC_TYPE_SYNCOBJ,
550             .handle = sync_handles[0],
551             .flags  = DRM_XE_SYNC_FLAG_SIGNAL,
552          },
553       };
554 
555       struct drm_xe_vm_bind bind = {
556          .vm_id           = ec->xe.vm_id,
557          .num_binds       = ARRAY_SIZE(bind_ops),
558          .vector_of_binds = (uintptr_t)bind_ops,
559          .num_syncs       = 1,
560          .syncs           = (uintptr_t)bind_syncs,
561       };
562 
563       int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_BIND, &bind);
564       if (err)
565          failf("xe_vm_bind");
566 
567       struct drm_xe_sync exec_syncs[] = {
568          {
569             .type   = DRM_XE_SYNC_TYPE_SYNCOBJ,
570             .handle = sync_handles[0],
571          },
572          {
573             .type   = DRM_XE_SYNC_TYPE_SYNCOBJ,
574             .handle = sync_handles[1],
575             .flags  = DRM_XE_SYNC_FLAG_SIGNAL,
576          }
577       };
578 
579       struct drm_xe_exec exec = {
580          .exec_queue_id    = ec->xe.queue_id,
581          .num_batch_buffer = 1,
582          .address          = ec->batch_start,
583          .num_syncs        = 2,
584          .syncs            = (uintptr_t)exec_syncs,
585       };
586       err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC, &exec);
587       if (err)
588          failf("xe_exec");
589 
590       struct drm_syncobj_wait wait = {
591          .count_handles = 1,
592          .handles       = (uintptr_t)&sync_handles[1],
593          .timeout_nsec  = INT64_MAX,
594       };
595       err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
596       if (err)
597          failf("syncobj_wait");
598    }
599 }
600 
601 static void
executor_context_teardown(executor_context * ec)602 executor_context_teardown(executor_context *ec)
603 {
604    executor_destroy_bo(ec, &ec->bo.batch);
605    executor_destroy_bo(ec, &ec->bo.extra);
606    executor_destroy_bo(ec, &ec->bo.data);
607 
608    if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
609       struct drm_i915_gem_context_destroy destroy = {
610          .ctx_id = ec->i915.ctx_id,
611       };
612       int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
613       if (err)
614          failf("i915_gem_context_destroy");
615    } else {
616       assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
617 
618       struct drm_xe_exec_queue_destroy queue_destroy = {
619          .exec_queue_id = ec->xe.queue_id,
620       };
621       int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &queue_destroy);
622       if (err)
623          failf("xe_exec_queue_destroy");
624 
625       struct drm_xe_vm_destroy destroy = {
626          .vm_id =  ec->xe.vm_id,
627       };
628       err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_DESTROY, &destroy);
629       if (err)
630          failf("xe_vm_destroy");
631    }
632 }
633 
634 static int
l_execute(lua_State * L)635 l_execute(lua_State *L)
636 {
637    executor_context ec = {
638       .mem_ctx = ralloc_context(NULL),
639       .devinfo = &E.devinfo,
640       .isl_dev = &E.isl_dev,
641       .fd      = E.fd,
642    };
643 
644    executor_context_setup(&ec);
645 
646    executor_params params = {0};
647 
648    {
649       if (lua_gettop(L) != 1)
650          failf("execute() must have a single table argument");
651 
652       parse_execute_args(&ec, L, &params);
653 
654       const char *src = executor_apply_macros(&ec, params.original_src);
655 
656       FILE *f = fmemopen((void *)src, strlen(src), "r");
657 
658       brw_assemble_flags flags = 0;
659 
660       if (INTEL_DEBUG(DEBUG_CS)) {
661          printf("=== Processed assembly source ===\n"
662                 "%s"
663                 "=================================\n\n", src);
664          flags = BRW_ASSEMBLE_DUMP;
665       }
666 
667       brw_assemble_result asm = brw_assemble(ec.mem_ctx, ec.devinfo, f, "", flags);
668       fclose(f);
669 
670       if (!asm.bin)
671          failf("assembler failure");
672 
673       params.kernel_bin = asm.bin;
674       params.kernel_size = asm.bin_size;
675    }
676 
677    genX_call(emit_execute, &ec, &params);
678 
679    if (INTEL_DEBUG(DEBUG_BATCH)) {
680       struct intel_batch_decode_ctx decoder;
681       enum intel_batch_decode_flags flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
682       if (INTEL_DEBUG(DEBUG_COLOR))
683          flags |= INTEL_BATCH_DECODE_IN_COLOR;
684 
685       intel_batch_decode_ctx_init_brw(&decoder, &E.isa, &E.devinfo, stdout,
686                                       flags, NULL, decode_get_bo, decode_get_state_size, &ec);
687 
688       assert(ec.bo.batch.cursor > ec.bo.batch.map);
689       const int batch_offset = ec.batch_start - ec.bo.batch.addr;
690       const int batch_size = (ec.bo.batch.cursor - ec.bo.batch.map) - batch_offset;
691       assert(batch_offset < batch_size);
692 
693       intel_print_batch(&decoder, ec.bo.batch.map, batch_size, ec.batch_start, false);
694 
695       intel_batch_decode_ctx_finish(&decoder);
696    }
697 
698    executor_context_dispatch(&ec);
699 
700    {
701       /* TODO: Use userdata to return a wrapped C array instead of building
702        * values.  Could make integration with array operations better.
703        */
704       uint32_t *data = ec.bo.data.map;
705       const int n = ec.bo.data.size / 4;
706       lua_createtable(L, n, 0);
707       for (int i = 0; i < 8; i++) {
708          lua_pushinteger(L, data[i]);
709          lua_seti(L, -2, i);
710       }
711    }
712 
713    executor_context_teardown(&ec);
714    ralloc_free(ec.mem_ctx);
715 
716    return 1;
717 }
718 
719 static int
l_dump(lua_State * L)720 l_dump(lua_State *L)
721 {
722    /* TODO: Use a table to add options for the dump, e.g.
723     * starting offset, format, etc.
724     */
725 
726    assert(lua_type(L, 1) == LUA_TTABLE);
727    assert(lua_type(L, 2) == LUA_TNUMBER);
728    assert(lua_isinteger(L, 2));
729 
730    lua_Integer len_ = lua_tointeger(L, 2);
731    assert(len_ >= 0 && len_ <= INT_MAX);
732    int len = len_;
733 
734    int i;
735    for (i = 0; i < len; i++) {
736       if (i%8 == 0) printf("[0x%08x]", i * 4);
737       lua_rawgeti(L, 1, i);
738       lua_Integer val = lua_tointeger(L, -1);
739       printf(" 0x%08x", (uint32_t)val);
740       lua_pop(L, 1);
741       if (i%8 == 7) printf("\n");
742    }
743    if (i%8 != 0) printf("\n");
744    return 0;
745 }
746 
747 static int
l_check_ver(lua_State * L)748 l_check_ver(lua_State *L)
749 {
750    int top = lua_gettop(L);
751    for (int i = 1; i <= top; i++) {
752       lua_Integer v = luaL_checknumber(L, i);
753       if (E.devinfo.ver == v) {
754          return 0;
755       }
756    }
757    failf("script doesn't support version=%d verx10=%d\n",
758          E.devinfo.ver, E.devinfo.verx10);
759    return 0;
760 }
761 
762 static int
l_check_verx10(lua_State * L)763 l_check_verx10(lua_State *L)
764 {
765    int top = lua_gettop(L);
766    for (int i = 1; i <= top; i++) {
767       lua_Integer v = luaL_checknumber(L, i);
768       if (E.devinfo.verx10 == v) {
769          return 0;
770       }
771    }
772    failf("script doesn't support version=%d verx10=%d\n",
773          E.devinfo.ver, E.devinfo.verx10);
774    return 0;
775 }
776 
777 /* TODO: Review numeric limits in the code, specially around Lua integer
778  * conversion.
779  */
780 
781 int
main(int argc,char * argv[])782 main(int argc, char *argv[])
783 {
784    if (argc < 2 ||
785        !strcmp(argv[1], "--help") ||
786        !strcmp(argv[1], "-help") ||
787        !strcmp(argv[1], "-h") ||
788        !strcmp(argv[1], "help")) {
789       print_help();
790       return 0;
791    }
792 
793    if (argc > 2) {
794       /* TODO: Expose extra arguments to the script as a variable. */
795       failf("invalid extra arguments\nusage: executor FILENAME");
796       return 1;
797    }
798 
799    process_intel_debug_variable();
800 
801    E.fd = get_drm_device(&E.devinfo);
802    isl_device_init(&E.isl_dev, &E.devinfo);
803    brw_init_isa_info(&E.isa, &E.devinfo);
804    assert(E.devinfo.kmd_type == INTEL_KMD_TYPE_I915 ||
805           E.devinfo.kmd_type == INTEL_KMD_TYPE_XE);
806 
807    lua_State *L = luaL_newstate();
808 
809    /* TODO: Could be nice to export some kind of builder interface,
810     * maybe even let the script construct a shader at the BRW IR
811     * level and let the later passes kick in.
812     */
813 
814    luaL_openlibs(L);
815 
816    lua_pushinteger(L, E.devinfo.ver);
817    lua_setglobal(L, "ver");
818 
819    lua_pushinteger(L, E.devinfo.verx10);
820    lua_setglobal(L, "verx10");
821 
822    lua_pushcfunction(L, l_execute);
823    lua_setglobal(L, "execute");
824 
825    lua_pushcfunction(L, l_dump);
826    lua_setglobal(L, "dump");
827 
828    lua_pushcfunction(L, l_check_ver);
829    lua_setglobal(L, "check_ver");
830 
831    lua_pushcfunction(L, l_check_verx10);
832    lua_setglobal(L, "check_verx10");
833 
834    const char *filename = argv[1];
835    int err = luaL_loadfile(L, filename);
836    if (err)
837       failf("failed to load script: %s", lua_tostring(L, -1));
838 
839    err = lua_pcall(L, 0, 0, 0);
840    if (err)
841       failf("failed to run script: %s", lua_tostring(L, -1));
842 
843    lua_close(L);
844    close(E.fd);
845 
846    return 0;
847 }
848 
849 void
failf(const char * fmt,...)850 failf(const char *fmt, ...)
851 {
852    va_list args;
853    va_start(args, fmt);
854    fprintf(stderr, "ERROR: ");
855    vfprintf(stderr, fmt, args);
856    fprintf(stderr, "\n");
857    va_end(args);
858    exit(1);
859 }
860