1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_compiler.h"
25 #include "brw_kernel.h"
26 #include "common/intel_disasm.h"
27 #include "compiler/clc/clc.h"
28 #include "compiler/glsl_types.h"
29 #include "dev/intel_debug.h"
30 #include "util/build_id.h"
31 #include "util/disk_cache.h"
32 #include "util/macros.h"
33 #include "util/mesa-sha1.h"
34 #include "util/u_dynarray.h"
35
36 #include <errno.h>
37 #include <fcntl.h>
38 #include <getopt.h>
39 #include <inttypes.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <sys/mman.h>
43
44 /* Shader functions */
45 #define SPIR_V_MAGIC_NUMBER 0x07230203
46
47 static struct disk_cache *
get_disk_cache(struct brw_compiler * compiler)48 get_disk_cache(struct brw_compiler *compiler)
49 {
50 #ifdef ENABLE_SHADER_CACHE
51 char renderer[14];
52 ASSERTED int len = snprintf(renderer, sizeof(renderer), "brw_clc_%04x",
53 compiler->devinfo->pci_device_id);
54 assert(len == sizeof(renderer) - 2);
55
56 const struct build_id_note *note =
57 build_id_find_nhdr_for_addr(get_disk_cache);
58 if (note == NULL) {
59 fprintf(stderr, "Failed to find build-id\n");
60 abort();
61 }
62
63 unsigned build_id_len = build_id_length(note);
64 if (build_id_len < 20) {
65 fprintf(stderr, "build-id too short. It needs to be a SHA\n");
66 abort();
67 }
68
69 struct mesa_sha1 sha1_ctx;
70 uint8_t sha1[20];
71 _mesa_sha1_init(&sha1_ctx);
72 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
73 _mesa_sha1_final(&sha1_ctx, sha1);
74
75 char timestamp[41];
76 _mesa_sha1_format(timestamp, sha1);
77
78 const uint64_t driver_flags = brw_get_compiler_config_value(compiler);
79
80 return disk_cache_create(renderer, timestamp, driver_flags);
81 #endif
82 return NULL;
83 }
84
85 static void
compiler_log(void * data,unsigned * id,const char * fmt,...)86 compiler_log(void *data, unsigned *id, const char *fmt, ...)
87 {
88 va_list args;
89 va_start(args, fmt);
90 vfprintf(stderr, fmt, args);
91 va_end(args);
92 }
93
94 static void
msg_callback(void * priv,const char * msg)95 msg_callback(void *priv, const char *msg)
96 {
97 (void)priv;
98 fprintf(stderr, "%s", msg);
99 }
100
101 static void
print_u32_data(FILE * fp,const char * prefix,const char * arr_name,const uint32_t * data,size_t len)102 print_u32_data(FILE *fp, const char *prefix, const char *arr_name,
103 const uint32_t *data, size_t len)
104 {
105 assert(len % 4 == 0);
106 fprintf(fp, "static const uint32_t %s_%s[] = {", prefix, arr_name);
107 for (unsigned i = 0; i < (len / 4); i++) {
108 if (i % 4 == 0)
109 fprintf(fp,"\n ");
110
111 fprintf(fp, " 0x%08" PRIx32 ",", data[i]);
112 }
113 fprintf(fp, "\n};\n");
114 }
115
116 static const char *
reloc_type_str(enum brw_shader_reloc_type type)117 reloc_type_str(enum brw_shader_reloc_type type)
118 {
119 switch (type) {
120 #define CASE(e) case e: return #e;
121 CASE(BRW_SHADER_RELOC_TYPE_U32)
122 CASE(BRW_SHADER_RELOC_TYPE_MOV_IMM)
123 #undef CASE
124 default:
125 unreachable("Unknown relocation type");
126 }
127 }
128
129 static void
print_cs_prog_data_fields(FILE * fp,const char * prefix,const char * pad,const struct brw_cs_prog_data * cs_prog_data)130 print_cs_prog_data_fields(FILE *fp, const char *prefix, const char *pad,
131 const struct brw_cs_prog_data *cs_prog_data)
132 {
133 #define PROG_DATA_FIELD(fmt, field) \
134 fprintf(fp, "%s." #field " = " fmt ",\n", pad, cs_prog_data->field)
135
136 #define PROG_DATA_BOOL_FIELD(field) \
137 fprintf(fp, "%s." #field " = %s,\n", pad, \
138 cs_prog_data->field ? "true" : "false")
139
140 PROG_DATA_FIELD("%u", base.nr_params);
141 assert(cs_prog_data->base.stage == MESA_SHADER_COMPUTE);
142 fprintf(fp, "%s.base.stage = MESA_SHADER_COMPUTE,\n", pad);
143 assert(cs_prog_data->base.zero_push_reg == 0);
144 assert(cs_prog_data->base.push_reg_mask_param == 0);
145 PROG_DATA_FIELD("%u", base.curb_read_length);
146 PROG_DATA_FIELD("%u", base.total_scratch);
147 PROG_DATA_FIELD("%u", base.total_shared);
148 PROG_DATA_FIELD("%u", base.program_size);
149 PROG_DATA_FIELD("%u", base.const_data_size);
150 PROG_DATA_FIELD("%u", base.const_data_offset);
151 PROG_DATA_FIELD("%u", base.num_relocs);
152 fprintf(fp, "%s.base.relocs = %s_relocs,\n", pad, prefix);
153 assert(!cs_prog_data->base.has_ubo_pull);
154 assert(cs_prog_data->base.dispatch_grf_start_reg == 0);
155 assert(!cs_prog_data->base.use_alt_mode);
156 assert(cs_prog_data->base.param == 0);
157 PROG_DATA_BOOL_FIELD(base.uses_atomic_load_store);
158 fprintf(fp, "%s.local_size = { %u, %u, %u },\n", pad,
159 cs_prog_data->local_size[0],
160 cs_prog_data->local_size[1],
161 cs_prog_data->local_size[2]);
162 fprintf(fp, "%s.prog_offset = { %u, %u, %u },\n", pad,
163 cs_prog_data->prog_offset[0],
164 cs_prog_data->prog_offset[1],
165 cs_prog_data->prog_offset[2]);
166 PROG_DATA_FIELD("%u", prog_mask);
167 PROG_DATA_FIELD("%u", prog_spilled);
168 PROG_DATA_BOOL_FIELD(uses_barrier);
169 PROG_DATA_BOOL_FIELD(uses_num_work_groups);
170 assert(!cs_prog_data->uses_inline_data);
171 assert(!cs_prog_data->uses_btd_stack_ids);
172 PROG_DATA_FIELD("%u", push.per_thread.dwords);
173 PROG_DATA_FIELD("%u", push.per_thread.regs);
174 PROG_DATA_FIELD("%u", push.per_thread.size);
175 PROG_DATA_FIELD("%u", push.cross_thread.dwords);
176 PROG_DATA_FIELD("%u", push.cross_thread.regs);
177 PROG_DATA_FIELD("%u", push.cross_thread.size);
178
179 #undef PROG_DATA_FIELD
180 #undef PROG_DATA_BOOL_FIELD
181 }
182
183 static void
print_kernel(FILE * fp,const char * prefix,const struct brw_kernel * kernel,const struct brw_isa_info * isa)184 print_kernel(FILE *fp, const char *prefix,
185 const struct brw_kernel *kernel,
186 const struct brw_isa_info *isa)
187 {
188 struct mesa_sha1 sha1_ctx;
189 _mesa_sha1_init(&sha1_ctx);
190
191 #define SHA1_UPDATE_VALUE(val) \
192 _mesa_sha1_update(&sha1_ctx, &val, sizeof(val))
193
194 fprintf(fp, "#include \"intel/compiler/brw_kernel.h\"\n");
195 fprintf(fp, "\n");
196
197 fprintf(fp, "static const struct brw_shader_reloc %s_relocs[] = {\n",
198 prefix);
199 for (unsigned i = 0; i < kernel->prog_data.base.num_relocs; i++) {
200 const struct brw_shader_reloc *reloc = &kernel->prog_data.base.relocs[i];
201 fprintf(fp, " { %"PRIu32", %s, %"PRIu32", %"PRIu32" },\n",
202 reloc->id, reloc_type_str(reloc->type),
203 reloc->offset, reloc->delta);
204 }
205 fprintf(fp, "};\n");
206 _mesa_sha1_update(&sha1_ctx, kernel->prog_data.base.relocs,
207 kernel->prog_data.base.num_relocs *
208 sizeof(kernel->prog_data.base.relocs[0]));
209
210 /* Get rid of the pointers before we hash */
211 struct brw_cs_prog_data cs_prog_data = kernel->prog_data;
212 cs_prog_data.base.relocs = NULL;
213 assert(cs_prog_data.base.param == NULL);
214 _mesa_sha1_update(&sha1_ctx, &cs_prog_data, sizeof(cs_prog_data));
215
216 SHA1_UPDATE_VALUE(kernel->args_size);
217 SHA1_UPDATE_VALUE(kernel->arg_count);
218 _mesa_sha1_update(&sha1_ctx, kernel->args,
219 kernel->arg_count * sizeof(kernel->args[0]));
220
221 fprintf(fp, "static const struct brw_kernel_arg_desc %s_args[] = {\n",
222 prefix);
223 for (unsigned i = 0; i < kernel->arg_count; i++) {
224 fprintf(fp, " { %d, %d },\n",
225 kernel->args[i].offset, kernel->args[i].size);
226 }
227 fprintf(fp, "};\n\n");
228
229 _mesa_sha1_update(&sha1_ctx, kernel->code,
230 kernel->prog_data.base.program_size);
231
232 fprintf(fp, "#if 0 /* BEGIN KERNEL ASSEMBLY */\n");
233 fprintf(fp, "\n");
234 intel_disassemble(isa, kernel->code, 0, fp);
235 fprintf(fp, "\n");
236 fprintf(fp, "#endif /* END KERNEL ASSEMBLY */\n");
237 print_u32_data(fp, prefix, "code", kernel->code,
238 kernel->prog_data.base.program_size);
239
240 fprintf(fp, "static const struct brw_kernel %s = {\n", prefix);
241 fprintf(fp, " .prog_data = {\n");
242 print_cs_prog_data_fields(fp, prefix, " ", &kernel->prog_data);
243 fprintf(fp, " },\n");
244 fprintf(fp, " .args_size = %d,\n", (int)kernel->args_size);
245 fprintf(fp, " .arg_count = %d,\n", (int)kernel->arg_count);
246 fprintf(fp, " .args = %s_args,\n", prefix);
247 fprintf(fp, " .code = %s_code,\n", prefix);
248 fprintf(fp, "};\n");
249
250 unsigned char sha1[20];
251 _mesa_sha1_final(&sha1_ctx, sha1);
252 char sha1_str[41];
253 _mesa_sha1_format(sha1_str, sha1);
254 fprintf(fp, "const char *%s_sha1 = \"%s\";\n", prefix, sha1_str);
255 }
256
257 static void
print_usage(char * exec_name,FILE * f)258 print_usage(char *exec_name, FILE *f)
259 {
260 fprintf(f,
261 "Usage: %s [options] [clang args | input file]\n"
262 "Options:\n"
263 " -h --help Print this help.\n"
264 " -e, --entrypoint <name> Specify the entry-point name.\n"
265 " -p, --platform <name> Specify the target platform name.\n"
266 " --prefix <prefix> Prefix for variable names in generated C code.\n"
267 " -g, --out <filename> Specify the output filename.\n"
268 " -s, --spv <filename> Specify the output filename for spirv.\n"
269 , exec_name);
270 }
271
272 #define OPT_PREFIX 1000
273
274 static uint32_t
get_module_spirv_version(const uint32_t * spirv,size_t size)275 get_module_spirv_version(const uint32_t *spirv, size_t size)
276 {
277 assert(size >= 8);
278 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
279 return spirv[1];
280 }
281
282 static void
set_module_spirv_version(uint32_t * spirv,size_t size,uint32_t version)283 set_module_spirv_version(uint32_t *spirv, size_t size, uint32_t version)
284 {
285 assert(size >= 8);
286 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
287 spirv[1] = version;
288 }
289
main(int argc,char ** argv)290 int main(int argc, char **argv)
291 {
292 brw_process_intel_debug_variable();
293
294 static struct option long_options[] ={
295 {"help", no_argument, 0, 'h'},
296 {"entrypoint", required_argument, 0, 'e'},
297 {"platform", required_argument, 0, 'p'},
298 {"prefix", required_argument, 0, OPT_PREFIX},
299 {"in", required_argument, 0, 'i'},
300 {"out", required_argument, 0, 'o'},
301 {"spv", required_argument, 0, 's'},
302 {"info", no_argument, 0, 'i'},
303 {0, 0, 0, 0}
304 };
305
306 char *entry_point = NULL, *platform = NULL, *outfile = NULL, *spv_outfile = NULL, *prefix = NULL;
307 struct util_dynarray clang_args;
308 struct util_dynarray input_files;
309 struct util_dynarray spirv_objs;
310 struct util_dynarray spirv_ptr_objs;
311 bool print_info = false;
312
313 void *mem_ctx = ralloc_context(NULL);
314
315 util_dynarray_init(&clang_args, mem_ctx);
316 util_dynarray_init(&input_files, mem_ctx);
317 util_dynarray_init(&spirv_objs, mem_ctx);
318 util_dynarray_init(&spirv_ptr_objs, mem_ctx);
319
320 int ch;
321 while ((ch = getopt_long(argc, argv, "he:p:s:o:i", long_options, NULL)) != -1)
322 {
323 switch (ch)
324 {
325 case 'h':
326 print_usage(argv[0], stdout);
327 return 0;
328 case 'e':
329 entry_point = optarg;
330 break;
331 case 'p':
332 platform = optarg;
333 break;
334 case 'o':
335 outfile = optarg;
336 break;
337 case 's':
338 spv_outfile = optarg;
339 break;
340 case 'i':
341 print_info = true;
342 break;
343 case OPT_PREFIX:
344 prefix = optarg;
345 break;
346 default:
347 fprintf(stderr, "Unrecognized option \"%s\".\n", optarg);
348 print_usage(argv[0], stderr);
349 return 1;
350 }
351 }
352
353 for (int i = optind; i < argc; i++) {
354 if (argv[i][0] == '-')
355 util_dynarray_append(&clang_args, char *, argv[i]);
356 else
357 util_dynarray_append(&input_files, char *, argv[i]);
358 }
359
360 if (util_dynarray_num_elements(&input_files, char *) == 0) {
361 fprintf(stderr, "No input file(s).\n");
362 print_usage(argv[0], stderr);
363 return -1;
364 }
365
366 if (platform == NULL) {
367 fprintf(stderr, "No target platform name specified.\n");
368 print_usage(argv[0], stderr);
369 return -1;
370 }
371
372 int pci_id = intel_device_name_to_pci_device_id(platform);
373 if (pci_id < 0) {
374 fprintf(stderr, "Invalid target platform name: %s\n", platform);
375 return -1;
376 }
377
378 struct intel_device_info _devinfo, *devinfo = &_devinfo;
379 if (!intel_get_device_info_from_pci_id(pci_id, devinfo)) {
380 fprintf(stderr, "Failed to get device information.\n");
381 return -1;
382 }
383
384 if (devinfo->verx10 < 125) {
385 fprintf(stderr, "Platform currently not supported.\n");
386 return -1;
387 }
388
389 struct brw_isa_info _isa, *isa = &_isa;
390 brw_init_isa_info(isa, devinfo);
391
392 if (entry_point == NULL) {
393 fprintf(stderr, "No entry-point name specified.\n");
394 print_usage(argv[0], stderr);
395 return -1;
396 }
397
398 struct clc_logger logger = {
399 .error = msg_callback,
400 .warning = msg_callback,
401 };
402
403 util_dynarray_foreach(&input_files, char *, infile) {
404 int fd = open(*infile, O_RDONLY);
405 if (fd < 0) {
406 fprintf(stderr, "Failed to open %s\n", *infile);
407 ralloc_free(mem_ctx);
408 return 1;
409 }
410
411 off_t len = lseek(fd, 0, SEEK_END);
412 const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
413 close(fd);
414 if (map == MAP_FAILED) {
415 fprintf(stderr, "Failed to mmap the file: errno=%d, %s\n",
416 errno, strerror(errno));
417 ralloc_free(mem_ctx);
418 return 1;
419 }
420
421 const char *allowed_spirv_extensions[] = {
422 "SPV_EXT_shader_atomic_float_add",
423 "SPV_EXT_shader_atomic_float_min_max",
424 "SPV_KHR_float_controls",
425 "SPV_INTEL_subgroups",
426 NULL,
427 };
428
429 struct clc_compile_args clc_args = {
430 .source = {
431 .name = *infile,
432 .value = map,
433 },
434 .features = {
435 .fp16 = true,
436 .intel_subgroups = true,
437 .subgroups = true,
438 },
439 .args = util_dynarray_begin(&clang_args),
440 .num_args = util_dynarray_num_elements(&clang_args, char *),
441 .allowed_spirv_extensions = allowed_spirv_extensions,
442 };
443
444 struct clc_binary *spirv_out =
445 util_dynarray_grow(&spirv_objs, struct clc_binary, 1);
446
447 if (!clc_compile_c_to_spirv(&clc_args, &logger, spirv_out)) {
448 ralloc_free(mem_ctx);
449 return 1;
450 }
451 }
452
453 util_dynarray_foreach(&spirv_objs, struct clc_binary, p) {
454 util_dynarray_append(&spirv_ptr_objs, struct clc_binary *, p);
455 }
456
457 /* The SPIRV-Tools linker started checking that all modules have the same
458 * version. But SPIRV-LLVM-Translator picks the lower required version for
459 * each module it compiles. So we have to iterate over all of them and set
460 * the max found to make SPIRV-Tools link our modules.
461 *
462 * TODO: This is not the correct thing to do. We need SPIRV-LLVM-Translator
463 * to pick a given SPIRV version given to it and have all the modules
464 * at that version. We should remove this hack when this issue is
465 * fixed :
466 * https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1445
467 */
468 uint32_t max_spirv_version = 0;
469 util_dynarray_foreach(&spirv_ptr_objs, struct clc_binary *, module) {
470 max_spirv_version = MAX2(max_spirv_version,
471 get_module_spirv_version((*module)->data,
472 (*module)->size));
473 }
474
475 assert(max_spirv_version > 0);
476 util_dynarray_foreach(&spirv_ptr_objs, struct clc_binary *, module) {
477 set_module_spirv_version((*module)->data, (*module)->size,
478 max_spirv_version);
479 }
480
481
482 struct clc_linker_args link_args = {
483 .in_objs = util_dynarray_begin(&spirv_ptr_objs),
484 .num_in_objs = util_dynarray_num_elements(&spirv_ptr_objs,
485 struct clc_binary *),
486 .create_library = true,
487 };
488 struct clc_binary final_spirv;
489 if (!clc_link_spirv(&link_args, &logger, &final_spirv)) {
490 ralloc_free(mem_ctx);
491 return 1;
492 }
493
494 if (spv_outfile) {
495 FILE *fp = fopen(spv_outfile, "w");
496 fwrite(final_spirv.data, final_spirv.size, 1, fp);
497 fclose(fp);
498 }
499
500 struct clc_parsed_spirv parsed_spirv_data;
501 if (!clc_parse_spirv(&final_spirv, &logger, &parsed_spirv_data)) {
502 ralloc_free(mem_ctx);
503 return 1;
504 }
505
506 const struct clc_kernel_info *kernel_info = NULL;
507 for (unsigned i = 0; i < parsed_spirv_data.num_kernels; i++) {
508 if (strcmp(parsed_spirv_data.kernels[i].name, entry_point) == 0) {
509 kernel_info = &parsed_spirv_data.kernels[i];
510 break;
511 }
512 }
513 if (kernel_info == NULL) {
514 fprintf(stderr, "Kernel entrypoint %s not found\n", entry_point);
515 ralloc_free(mem_ctx);
516 return 1;
517 }
518
519 struct brw_kernel kernel = {};
520 char *error_str;
521
522 struct brw_compiler *compiler = brw_compiler_create(mem_ctx, devinfo);
523 compiler->shader_debug_log = compiler_log;
524 compiler->shader_perf_log = compiler_log;
525 struct disk_cache *disk_cache = get_disk_cache(compiler);
526
527 glsl_type_singleton_init_or_ref();
528
529 if (!brw_kernel_from_spirv(compiler, disk_cache, &kernel, NULL, mem_ctx,
530 final_spirv.data, final_spirv.size,
531 entry_point, &error_str)) {
532 fprintf(stderr, "Compile failed: %s\n", error_str);
533 ralloc_free(mem_ctx);
534 return 1;
535 }
536
537 if (print_info) {
538 fprintf(stdout, "kernel info:\n");
539 fprintf(stdout, " uses_barrier : %u\n", kernel.prog_data.uses_barrier);
540 fprintf(stdout, " uses_num_work_groups : %u\n", kernel.prog_data.uses_num_work_groups);
541 fprintf(stdout, " uses_inline_data : %u\n", kernel.prog_data.uses_inline_data);
542 fprintf(stdout, " local_size : %ux%ux%u\n",
543 kernel.prog_data.local_size[0],
544 kernel.prog_data.local_size[1],
545 kernel.prog_data.local_size[2]);
546 fprintf(stdout, " curb_read_length : %u\n", kernel.prog_data.base.curb_read_length);
547 fprintf(stdout, " total_scratch : %u\n", kernel.prog_data.base.total_scratch);
548 fprintf(stdout, " total_shared : %u\n", kernel.prog_data.base.total_shared);
549 fprintf(stdout, " program_size : %u\n", kernel.prog_data.base.program_size);
550 fprintf(stdout, " const_data_size : %u\n", kernel.prog_data.base.const_data_size);
551 fprintf(stdout, " uses_atomic_load_store : %u\n", kernel.prog_data.base.uses_atomic_load_store);
552 fprintf(stdout, " dispatch_grf_start_reg : %u\n", kernel.prog_data.base.dispatch_grf_start_reg);
553 }
554
555 glsl_type_singleton_decref();
556
557 char prefix_tmp[256];
558 if (prefix == NULL) {
559 bool is_pt_5 = (devinfo->verx10 % 10) == 5;
560 snprintf(prefix_tmp, sizeof(prefix_tmp), "gfx%d%s_clc_%s",
561 devinfo->ver, is_pt_5 ? "5" : "", entry_point);
562 prefix = prefix_tmp;
563 }
564
565 if (outfile != NULL) {
566 FILE *fp = fopen(outfile, "w");
567 print_kernel(fp, prefix, &kernel, isa);
568 fclose(fp);
569 } else {
570 print_kernel(stdout, prefix, &kernel, isa);
571 }
572
573 ralloc_free(mem_ctx);
574
575 return 0;
576 }
577