1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Faith Ekstrand (faith@gfxstrand.net)
25 *
26 */
27
28 #include "glsl_types.h"
29 #include "vtn_private.h"
30 #include "nir/nir_vla.h"
31 #include "nir/nir_control_flow.h"
32 #include "nir/nir_constant_expressions.h"
33 #include "nir/nir_deref.h"
34 #include "spirv_info.h"
35
36 #include "util/format/u_format.h"
37 #include "util/u_math.h"
38 #include "util/u_string.h"
39 #include "util/u_debug.h"
40
41 #include <stdio.h>
42
43 #ifndef NDEBUG
44 uint32_t mesa_spirv_debug = 0;
45
46 static const struct debug_named_value mesa_spirv_debug_control[] = {
47 { "structured", MESA_SPIRV_DEBUG_STRUCTURED,
48 "Print information of the SPIR-V structured control flow parsing" },
49 DEBUG_NAMED_VALUE_END,
50 };
51
52 DEBUG_GET_ONCE_FLAGS_OPTION(mesa_spirv_debug, "MESA_SPIRV_DEBUG", mesa_spirv_debug_control, 0)
53
54 static enum nir_spirv_debug_level
vtn_default_log_level(void)55 vtn_default_log_level(void)
56 {
57 enum nir_spirv_debug_level level = NIR_SPIRV_DEBUG_LEVEL_WARNING;
58 const char *vtn_log_level_strings[] = {
59 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = "warning",
60 [NIR_SPIRV_DEBUG_LEVEL_INFO] = "info",
61 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = "error",
62 };
63 const char *str = getenv("MESA_SPIRV_LOG_LEVEL");
64
65 if (str == NULL)
66 return NIR_SPIRV_DEBUG_LEVEL_WARNING;
67
68 for (int i = 0; i < ARRAY_SIZE(vtn_log_level_strings); i++) {
69 if (strcasecmp(str, vtn_log_level_strings[i]) == 0) {
70 level = i;
71 break;
72 }
73 }
74
75 return level;
76 }
77 #endif
78
79 void
vtn_log(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)80 vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level,
81 size_t spirv_offset, const char *message)
82 {
83 if (b->options->debug.func) {
84 b->options->debug.func(b->options->debug.private_data,
85 level, spirv_offset, message);
86 }
87
88 #ifndef NDEBUG
89 static enum nir_spirv_debug_level default_level =
90 NIR_SPIRV_DEBUG_LEVEL_INVALID;
91
92 if (default_level == NIR_SPIRV_DEBUG_LEVEL_INVALID)
93 default_level = vtn_default_log_level();
94
95 if (level >= default_level)
96 fprintf(stderr, "%s\n", message);
97 #endif
98 }
99
100 void
vtn_logf(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * fmt,...)101 vtn_logf(struct vtn_builder *b, enum nir_spirv_debug_level level,
102 size_t spirv_offset, const char *fmt, ...)
103 {
104 va_list args;
105 char *msg;
106
107 va_start(args, fmt);
108 msg = ralloc_vasprintf(NULL, fmt, args);
109 va_end(args);
110
111 vtn_log(b, level, spirv_offset, msg);
112
113 ralloc_free(msg);
114 }
115
116 static void
vtn_log_err(struct vtn_builder * b,enum nir_spirv_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)117 vtn_log_err(struct vtn_builder *b,
118 enum nir_spirv_debug_level level, const char *prefix,
119 const char *file, unsigned line,
120 const char *fmt, va_list args)
121 {
122 char *msg;
123
124 msg = ralloc_strdup(NULL, prefix);
125
126 #ifndef NDEBUG
127 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line);
128 #endif
129
130 ralloc_asprintf_append(&msg, " ");
131
132 ralloc_vasprintf_append(&msg, fmt, args);
133
134 ralloc_asprintf_append(&msg, "\n %zu bytes into the SPIR-V binary",
135 b->spirv_offset);
136
137 if (b->file) {
138 ralloc_asprintf_append(&msg,
139 "\n in SPIR-V source file %s, line %d, col %d",
140 b->file, b->line, b->col);
141 }
142
143 vtn_log(b, level, b->spirv_offset, msg);
144
145 ralloc_free(msg);
146 }
147
148 static void
vtn_dump_shader(struct vtn_builder * b,const char * path,const char * prefix)149 vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix)
150 {
151 static int idx = 0;
152
153 char filename[1024];
154 int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv",
155 path, prefix, idx++);
156 if (len < 0 || len >= sizeof(filename))
157 return;
158
159 FILE *f = fopen(filename, "wb");
160 if (f == NULL)
161 return;
162
163 fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f);
164 fclose(f);
165
166 vtn_info("SPIR-V shader dumped to %s", filename);
167 }
168
169 void
_vtn_warn(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)170 _vtn_warn(struct vtn_builder *b, const char *file, unsigned line,
171 const char *fmt, ...)
172 {
173 va_list args;
174
175 va_start(args, fmt);
176 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_WARNING, "SPIR-V WARNING:\n",
177 file, line, fmt, args);
178 va_end(args);
179 }
180
181 void
_vtn_err(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)182 _vtn_err(struct vtn_builder *b, const char *file, unsigned line,
183 const char *fmt, ...)
184 {
185 va_list args;
186
187 va_start(args, fmt);
188 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V ERROR:\n",
189 file, line, fmt, args);
190 va_end(args);
191 }
192
193 void
_vtn_fail(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)194 _vtn_fail(struct vtn_builder *b, const char *file, unsigned line,
195 const char *fmt, ...)
196 {
197 va_list args;
198
199 va_start(args, fmt);
200 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V parsing FAILED:\n",
201 file, line, fmt, args);
202 va_end(args);
203
204 const char *dump_path = secure_getenv("MESA_SPIRV_FAIL_DUMP_PATH");
205 if (dump_path)
206 vtn_dump_shader(b, dump_path, "fail");
207
208 #ifndef NDEBUG
209 if (!b->options->skip_os_break_in_debug_build)
210 os_break();
211 #endif
212
213 vtn_longjmp(b->fail_jump, 1);
214 }
215
216 const char *
vtn_value_type_to_string(enum vtn_value_type t)217 vtn_value_type_to_string(enum vtn_value_type t)
218 {
219 #define CASE(typ) case vtn_value_type_##typ: return #typ
220 switch (t) {
221 CASE(invalid);
222 CASE(undef);
223 CASE(string);
224 CASE(decoration_group);
225 CASE(type);
226 CASE(constant);
227 CASE(pointer);
228 CASE(function);
229 CASE(block);
230 CASE(ssa);
231 CASE(extension);
232 CASE(image_pointer);
233 }
234 #undef CASE
235 unreachable("unknown value type");
236 return "UNKNOWN";
237 }
238
239 void
_vtn_fail_value_type_mismatch(struct vtn_builder * b,uint32_t value_id,enum vtn_value_type value_type)240 _vtn_fail_value_type_mismatch(struct vtn_builder *b, uint32_t value_id,
241 enum vtn_value_type value_type)
242 {
243 struct vtn_value *val = vtn_untyped_value(b, value_id);
244 vtn_fail(
245 "SPIR-V id %u is the wrong kind of value: "
246 "expected '%s' but got '%s'",
247 vtn_id_for_value(b, val),
248 vtn_value_type_to_string(value_type),
249 vtn_value_type_to_string(val->value_type));
250 }
251
_vtn_fail_value_not_pointer(struct vtn_builder * b,uint32_t value_id)252 void _vtn_fail_value_not_pointer(struct vtn_builder *b,
253 uint32_t value_id)
254 {
255 struct vtn_value *val = vtn_untyped_value(b, value_id);
256 vtn_fail("SPIR-V id %u is the wrong kind of value: "
257 "expected 'pointer' OR null constant but got "
258 "'%s' (%s)", value_id,
259 vtn_value_type_to_string(val->value_type),
260 val->is_null_constant ? "null constant" : "not null constant");
261 }
262
263 static struct vtn_ssa_value *
vtn_undef_ssa_value(struct vtn_builder * b,const struct glsl_type * type)264 vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
265 {
266 struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
267 val->type = glsl_get_bare_type(type);
268
269 if (glsl_type_is_cmat(type)) {
270 nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_undef");
271 vtn_set_ssa_value_var(b, val, mat->var);
272 } else if (glsl_type_is_vector_or_scalar(type)) {
273 unsigned num_components = glsl_get_vector_elements(val->type);
274 unsigned bit_size = glsl_get_bit_size(val->type);
275 val->def = nir_undef(&b->nb, num_components, bit_size);
276 } else {
277 unsigned elems = glsl_get_length(val->type);
278 val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
279 if (glsl_type_is_array_or_matrix(type)) {
280 const struct glsl_type *elem_type = glsl_get_array_element(type);
281 for (unsigned i = 0; i < elems; i++)
282 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
283 } else {
284 vtn_assert(glsl_type_is_struct_or_ifc(type));
285 for (unsigned i = 0; i < elems; i++) {
286 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
287 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
288 }
289 }
290 }
291
292 return val;
293 }
294
295 struct vtn_ssa_value *
vtn_const_ssa_value(struct vtn_builder * b,nir_constant * constant,const struct glsl_type * type)296 vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
297 const struct glsl_type *type)
298 {
299 struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
300 val->type = glsl_get_bare_type(type);
301
302 if (glsl_type_is_cmat(type)) {
303 const struct glsl_type *element_type = glsl_get_cmat_element(type);
304
305 nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_constant");
306 nir_cmat_construct(&b->nb, &mat->def,
307 nir_build_imm(&b->nb, 1, glsl_get_bit_size(element_type),
308 constant->values));
309 vtn_set_ssa_value_var(b, val, mat->var);
310 } else if (glsl_type_is_vector_or_scalar(type)) {
311 val->def = nir_build_imm(&b->nb, glsl_get_vector_elements(val->type),
312 glsl_get_bit_size(val->type),
313 constant->values);
314 } else {
315 unsigned elems = glsl_get_length(val->type);
316 val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
317 if (glsl_type_is_array_or_matrix(type)) {
318 const struct glsl_type *elem_type = glsl_get_array_element(type);
319 for (unsigned i = 0; i < elems; i++) {
320 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
321 elem_type);
322 }
323 } else {
324 vtn_assert(glsl_type_is_struct_or_ifc(type));
325 for (unsigned i = 0; i < elems; i++) {
326 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
327 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
328 elem_type);
329 }
330 }
331 }
332
333 return val;
334 }
335
336 struct vtn_ssa_value *
vtn_ssa_value(struct vtn_builder * b,uint32_t value_id)337 vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
338 {
339 struct vtn_value *val = vtn_untyped_value(b, value_id);
340 switch (val->value_type) {
341 case vtn_value_type_undef:
342 return vtn_undef_ssa_value(b, val->type->type);
343
344 case vtn_value_type_constant:
345 return vtn_const_ssa_value(b, val->constant, val->type->type);
346
347 case vtn_value_type_ssa:
348 return val->ssa;
349
350 case vtn_value_type_pointer:
351 vtn_assert(val->pointer->ptr_type && val->pointer->ptr_type->type);
352 struct vtn_ssa_value *ssa =
353 vtn_create_ssa_value(b, val->pointer->ptr_type->type);
354 ssa->def = vtn_pointer_to_ssa(b, val->pointer);
355 return ssa;
356
357 default:
358 vtn_fail("Invalid type for an SSA value");
359 }
360 }
361
362 struct vtn_value *
vtn_push_ssa_value(struct vtn_builder * b,uint32_t value_id,struct vtn_ssa_value * ssa)363 vtn_push_ssa_value(struct vtn_builder *b, uint32_t value_id,
364 struct vtn_ssa_value *ssa)
365 {
366 struct vtn_type *type = vtn_get_value_type(b, value_id);
367
368 /* See vtn_create_ssa_value */
369 vtn_fail_if(ssa->type != glsl_get_bare_type(type->type),
370 "Type mismatch for SPIR-V value %%%u", value_id);
371
372 struct vtn_value *val;
373 if (type->base_type == vtn_base_type_pointer) {
374 val = vtn_push_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type));
375 } else {
376 /* Don't trip the value_type_ssa check in vtn_push_value */
377 val = vtn_push_value(b, value_id, vtn_value_type_invalid);
378 val->value_type = vtn_value_type_ssa;
379 val->ssa = ssa;
380 }
381
382 return val;
383 }
384
385 nir_def *
vtn_get_nir_ssa(struct vtn_builder * b,uint32_t value_id)386 vtn_get_nir_ssa(struct vtn_builder *b, uint32_t value_id)
387 {
388 struct vtn_ssa_value *ssa = vtn_ssa_value(b, value_id);
389 vtn_fail_if(!glsl_type_is_vector_or_scalar(ssa->type),
390 "Expected a vector or scalar type");
391 return ssa->def;
392 }
393
394 struct vtn_value *
vtn_push_nir_ssa(struct vtn_builder * b,uint32_t value_id,nir_def * def)395 vtn_push_nir_ssa(struct vtn_builder *b, uint32_t value_id, nir_def *def)
396 {
397 /* Types for all SPIR-V SSA values are set as part of a pre-pass so the
398 * type will be valid by the time we get here.
399 */
400 struct vtn_type *type = vtn_get_value_type(b, value_id);
401 vtn_fail_if(def->num_components != glsl_get_vector_elements(type->type) ||
402 def->bit_size != glsl_get_bit_size(type->type),
403 "Mismatch between NIR and SPIR-V type.");
404 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
405 ssa->def = def;
406 return vtn_push_ssa_value(b, value_id, ssa);
407 }
408
409 nir_deref_instr *
vtn_get_deref_for_id(struct vtn_builder * b,uint32_t value_id)410 vtn_get_deref_for_id(struct vtn_builder *b, uint32_t value_id)
411 {
412 return vtn_get_deref_for_ssa_value(b, vtn_ssa_value(b, value_id));
413 }
414
415 nir_deref_instr *
vtn_get_deref_for_ssa_value(struct vtn_builder * b,struct vtn_ssa_value * ssa)416 vtn_get_deref_for_ssa_value(struct vtn_builder *b, struct vtn_ssa_value *ssa)
417 {
418 vtn_fail_if(!ssa->is_variable, "Expected an SSA value with a nir_variable");
419 return nir_build_deref_var(&b->nb, ssa->var);
420 }
421
422 struct vtn_value *
vtn_push_var_ssa(struct vtn_builder * b,uint32_t value_id,nir_variable * var)423 vtn_push_var_ssa(struct vtn_builder *b, uint32_t value_id, nir_variable *var)
424 {
425 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, var->type);
426 vtn_set_ssa_value_var(b, ssa, var);
427 return vtn_push_ssa_value(b, value_id, ssa);
428 }
429
430 static enum gl_access_qualifier
spirv_to_gl_access_qualifier(struct vtn_builder * b,SpvAccessQualifier access_qualifier)431 spirv_to_gl_access_qualifier(struct vtn_builder *b,
432 SpvAccessQualifier access_qualifier)
433 {
434 switch (access_qualifier) {
435 case SpvAccessQualifierReadOnly:
436 return ACCESS_NON_WRITEABLE;
437 case SpvAccessQualifierWriteOnly:
438 return ACCESS_NON_READABLE;
439 case SpvAccessQualifierReadWrite:
440 return 0;
441 default:
442 vtn_fail("Invalid image access qualifier");
443 }
444 }
445
446 static nir_deref_instr *
vtn_get_image(struct vtn_builder * b,uint32_t value_id,enum gl_access_qualifier * access)447 vtn_get_image(struct vtn_builder *b, uint32_t value_id,
448 enum gl_access_qualifier *access)
449 {
450 struct vtn_type *type = vtn_get_value_type(b, value_id);
451 vtn_assert(type->base_type == vtn_base_type_image);
452 if (access)
453 *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier);
454 nir_variable_mode mode = glsl_type_is_image(type->glsl_image) ?
455 nir_var_image : nir_var_uniform;
456 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
457 mode, type->glsl_image, 0);
458 }
459
460 static void
vtn_push_image(struct vtn_builder * b,uint32_t value_id,nir_deref_instr * deref,bool propagate_non_uniform)461 vtn_push_image(struct vtn_builder *b, uint32_t value_id,
462 nir_deref_instr *deref, bool propagate_non_uniform)
463 {
464 struct vtn_type *type = vtn_get_value_type(b, value_id);
465 vtn_assert(type->base_type == vtn_base_type_image);
466 struct vtn_value *value = vtn_push_nir_ssa(b, value_id, &deref->def);
467 value->propagated_non_uniform = propagate_non_uniform;
468 }
469
470 static nir_deref_instr *
vtn_get_sampler(struct vtn_builder * b,uint32_t value_id)471 vtn_get_sampler(struct vtn_builder *b, uint32_t value_id)
472 {
473 struct vtn_type *type = vtn_get_value_type(b, value_id);
474 vtn_assert(type->base_type == vtn_base_type_sampler);
475 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
476 nir_var_uniform, glsl_bare_sampler_type(), 0);
477 }
478
479 nir_def *
vtn_sampled_image_to_nir_ssa(struct vtn_builder * b,struct vtn_sampled_image si)480 vtn_sampled_image_to_nir_ssa(struct vtn_builder *b,
481 struct vtn_sampled_image si)
482 {
483 return nir_vec2(&b->nb, &si.image->def, &si.sampler->def);
484 }
485
486 static void
vtn_push_sampled_image(struct vtn_builder * b,uint32_t value_id,struct vtn_sampled_image si,bool propagate_non_uniform)487 vtn_push_sampled_image(struct vtn_builder *b, uint32_t value_id,
488 struct vtn_sampled_image si, bool propagate_non_uniform)
489 {
490 struct vtn_type *type = vtn_get_value_type(b, value_id);
491 vtn_assert(type->base_type == vtn_base_type_sampled_image);
492 struct vtn_value *value = vtn_push_nir_ssa(b, value_id,
493 vtn_sampled_image_to_nir_ssa(b, si));
494 value->propagated_non_uniform = propagate_non_uniform;
495 }
496
497 static struct vtn_sampled_image
vtn_get_sampled_image(struct vtn_builder * b,uint32_t value_id)498 vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id)
499 {
500 struct vtn_type *type = vtn_get_value_type(b, value_id);
501 vtn_assert(type->base_type == vtn_base_type_sampled_image);
502 nir_def *si_vec2 = vtn_get_nir_ssa(b, value_id);
503
504 /* Even though this is a sampled image, we can end up here with a storage
505 * image because OpenCL doesn't distinguish between the two.
506 */
507 const struct glsl_type *image_type = type->image->glsl_image;
508 nir_variable_mode image_mode = glsl_type_is_image(image_type) ?
509 nir_var_image : nir_var_uniform;
510
511 struct vtn_sampled_image si = { NULL, };
512 si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0),
513 image_mode, image_type, 0);
514 si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1),
515 nir_var_uniform,
516 glsl_bare_sampler_type(), 0);
517 return si;
518 }
519
520 const char *
vtn_string_literal(struct vtn_builder * b,const uint32_t * words,unsigned word_count,unsigned * words_used)521 vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
522 unsigned word_count, unsigned *words_used)
523 {
524 /* From the SPIR-V spec:
525 *
526 * "A string is interpreted as a nul-terminated stream of characters.
527 * The character set is Unicode in the UTF-8 encoding scheme. The UTF-8
528 * octets (8-bit bytes) are packed four per word, following the
529 * little-endian convention (i.e., the first octet is in the
530 * lowest-order 8 bits of the word). The final word contains the
531 * string’s nul-termination character (0), and all contents past the
532 * end of the string in the final word are padded with 0."
533 *
534 * On big-endian, we need to byte-swap.
535 */
536 #if UTIL_ARCH_BIG_ENDIAN
537 {
538 uint32_t *copy = vtn_alloc_array(b, uint32_t, word_count);
539 for (unsigned i = 0; i < word_count; i++)
540 copy[i] = util_bswap32(words[i]);
541 words = copy;
542 }
543 #endif
544
545 const char *str = (const char *)words;
546 const char *end = memchr(str, 0, word_count * 4);
547 vtn_fail_if(end == NULL, "String is not null-terminated");
548
549 if (words_used)
550 *words_used = DIV_ROUND_UP(end - str + 1, sizeof(*words));
551
552 return str;
553 }
554
555 const uint32_t *
vtn_foreach_instruction(struct vtn_builder * b,const uint32_t * start,const uint32_t * end,vtn_instruction_handler handler)556 vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
557 const uint32_t *end, vtn_instruction_handler handler)
558 {
559 b->file = NULL;
560 b->line = -1;
561 b->col = -1;
562
563 const uint32_t *w = start;
564 while (w < end) {
565 SpvOp opcode = w[0] & SpvOpCodeMask;
566 unsigned count = w[0] >> SpvWordCountShift;
567 vtn_assert(count >= 1 && w + count <= end);
568
569 b->spirv_offset = (uint8_t *)w - (uint8_t *)b->spirv;
570
571 switch (opcode) {
572 case SpvOpNop:
573 break; /* Do nothing */
574
575 case SpvOpLine:
576 b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
577 b->line = w[2];
578 b->col = w[3];
579 break;
580
581 case SpvOpNoLine:
582 b->file = NULL;
583 b->line = -1;
584 b->col = -1;
585 break;
586
587 default:
588 if (!handler(b, opcode, w, count))
589 return w;
590 break;
591 }
592
593 w += count;
594 }
595
596 b->spirv_offset = 0;
597 b->file = NULL;
598 b->line = -1;
599 b->col = -1;
600
601 assert(w == end);
602 return w;
603 }
604
605 static bool
vtn_handle_non_semantic_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)606 vtn_handle_non_semantic_instruction(struct vtn_builder *b, SpvOp ext_opcode,
607 const uint32_t *w, unsigned count)
608 {
609 /* Do nothing. */
610 return true;
611 }
612
613 static void
vtn_handle_extension(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)614 vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
615 const uint32_t *w, unsigned count)
616 {
617 switch (opcode) {
618 case SpvOpExtInstImport: {
619 struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
620 const char *ext = vtn_string_literal(b, &w[2], count - 2, NULL);
621 if (strcmp(ext, "GLSL.std.450") == 0) {
622 val->ext_handler = vtn_handle_glsl450_instruction;
623 } else if ((strcmp(ext, "SPV_AMD_gcn_shader") == 0)
624 && (b->options && b->options->caps.amd_gcn_shader)) {
625 val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
626 } else if ((strcmp(ext, "SPV_AMD_shader_ballot") == 0)
627 && (b->options && b->options->caps.amd_shader_ballot)) {
628 val->ext_handler = vtn_handle_amd_shader_ballot_instruction;
629 } else if ((strcmp(ext, "SPV_AMD_shader_trinary_minmax") == 0)
630 && (b->options && b->options->caps.amd_trinary_minmax)) {
631 val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction;
632 } else if ((strcmp(ext, "SPV_AMD_shader_explicit_vertex_parameter") == 0)
633 && (b->options && b->options->caps.amd_shader_explicit_vertex_parameter)) {
634 val->ext_handler = vtn_handle_amd_shader_explicit_vertex_parameter_instruction;
635 } else if (strcmp(ext, "OpenCL.std") == 0) {
636 val->ext_handler = vtn_handle_opencl_instruction;
637 } else if (strstr(ext, "NonSemantic.") == ext) {
638 val->ext_handler = vtn_handle_non_semantic_instruction;
639 } else {
640 vtn_fail("Unsupported extension: %s", ext);
641 }
642 break;
643 }
644
645 case SpvOpExtInst: {
646 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
647 bool handled = val->ext_handler(b, w[4], w, count);
648 vtn_assert(handled);
649 break;
650 }
651
652 default:
653 vtn_fail_with_opcode("Unhandled opcode", opcode);
654 }
655 }
656
657 static void
_foreach_decoration_helper(struct vtn_builder * b,struct vtn_value * base_value,int parent_member,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)658 _foreach_decoration_helper(struct vtn_builder *b,
659 struct vtn_value *base_value,
660 int parent_member,
661 struct vtn_value *value,
662 vtn_decoration_foreach_cb cb, void *data)
663 {
664 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
665 int member;
666 if (dec->scope == VTN_DEC_DECORATION) {
667 member = parent_member;
668 } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
669 vtn_fail_if(value->value_type != vtn_value_type_type ||
670 value->type->base_type != vtn_base_type_struct,
671 "OpMemberDecorate and OpGroupMemberDecorate are only "
672 "allowed on OpTypeStruct");
673 /* This means we haven't recursed yet */
674 assert(value == base_value);
675
676 member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
677
678 vtn_fail_if(member >= base_value->type->length,
679 "OpMemberDecorate specifies member %d but the "
680 "OpTypeStruct has only %u members",
681 member, base_value->type->length);
682 } else {
683 /* Not a decoration */
684 assert(dec->scope == VTN_DEC_EXECUTION_MODE ||
685 dec->scope <= VTN_DEC_STRUCT_MEMBER_NAME0);
686 continue;
687 }
688
689 if (dec->group) {
690 assert(dec->group->value_type == vtn_value_type_decoration_group);
691 _foreach_decoration_helper(b, base_value, member, dec->group,
692 cb, data);
693 } else {
694 cb(b, base_value, member, dec, data);
695 }
696 }
697 }
698
699 /** Iterates (recursively if needed) over all of the decorations on a value
700 *
701 * This function iterates over all of the decorations applied to a given
702 * value. If it encounters a decoration group, it recurses into the group
703 * and iterates over all of those decorations as well.
704 */
705 void
vtn_foreach_decoration(struct vtn_builder * b,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)706 vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
707 vtn_decoration_foreach_cb cb, void *data)
708 {
709 _foreach_decoration_helper(b, value, -1, value, cb, data);
710 }
711
712 void
vtn_foreach_execution_mode(struct vtn_builder * b,struct vtn_value * value,vtn_execution_mode_foreach_cb cb,void * data)713 vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
714 vtn_execution_mode_foreach_cb cb, void *data)
715 {
716 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
717 if (dec->scope != VTN_DEC_EXECUTION_MODE)
718 continue;
719
720 assert(dec->group == NULL);
721 cb(b, value, dec, data);
722 }
723 }
724
725 void
vtn_handle_decoration(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)726 vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
727 const uint32_t *w, unsigned count)
728 {
729 const uint32_t *w_end = w + count;
730 const uint32_t target = w[1];
731 w += 2;
732
733 switch (opcode) {
734 case SpvOpDecorationGroup:
735 vtn_push_value(b, target, vtn_value_type_decoration_group);
736 break;
737
738 case SpvOpDecorate:
739 case SpvOpDecorateId:
740 case SpvOpMemberDecorate:
741 case SpvOpDecorateString:
742 case SpvOpMemberDecorateString:
743 case SpvOpExecutionMode:
744 case SpvOpExecutionModeId: {
745 struct vtn_value *val = vtn_untyped_value(b, target);
746
747 struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
748 switch (opcode) {
749 case SpvOpDecorate:
750 case SpvOpDecorateId:
751 case SpvOpDecorateString:
752 dec->scope = VTN_DEC_DECORATION;
753 break;
754 case SpvOpMemberDecorate:
755 case SpvOpMemberDecorateString:
756 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
757 vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */
758 "Member argument of OpMemberDecorate too large");
759 break;
760 case SpvOpExecutionMode:
761 case SpvOpExecutionModeId:
762 dec->scope = VTN_DEC_EXECUTION_MODE;
763 break;
764 default:
765 unreachable("Invalid decoration opcode");
766 }
767 dec->decoration = *(w++);
768 dec->num_operands = w_end - w;
769 dec->operands = w;
770
771 /* Link into the list */
772 dec->next = val->decoration;
773 val->decoration = dec;
774 break;
775 }
776
777 case SpvOpMemberName: {
778 struct vtn_value *val = vtn_untyped_value(b, target);
779 struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
780
781 dec->scope = VTN_DEC_STRUCT_MEMBER_NAME0 - *(w++);
782
783 dec->member_name = vtn_string_literal(b, w, w_end - w, NULL);
784
785 dec->next = val->decoration;
786 val->decoration = dec;
787 break;
788 }
789
790 case SpvOpGroupMemberDecorate:
791 case SpvOpGroupDecorate: {
792 struct vtn_value *group =
793 vtn_value(b, target, vtn_value_type_decoration_group);
794
795 for (; w < w_end; w++) {
796 struct vtn_value *val = vtn_untyped_value(b, *w);
797 struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
798
799 dec->group = group;
800 if (opcode == SpvOpGroupDecorate) {
801 dec->scope = VTN_DEC_DECORATION;
802 } else {
803 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
804 vtn_fail_if(dec->scope < 0, /* Check for overflow */
805 "Member argument of OpGroupMemberDecorate too large");
806 }
807
808 /* Link into the list */
809 dec->next = val->decoration;
810 val->decoration = dec;
811 }
812 break;
813 }
814
815 default:
816 unreachable("Unhandled opcode");
817 }
818 }
819
820 struct member_decoration_ctx {
821 unsigned num_fields;
822 struct glsl_struct_field *fields;
823 struct vtn_type *type;
824 };
825
826 /**
827 * Returns true if the given type contains a struct decorated Block or
828 * BufferBlock
829 */
830 bool
vtn_type_contains_block(struct vtn_builder * b,struct vtn_type * type)831 vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type)
832 {
833 switch (type->base_type) {
834 case vtn_base_type_array:
835 return vtn_type_contains_block(b, type->array_element);
836 case vtn_base_type_struct:
837 if (type->block || type->buffer_block)
838 return true;
839 for (unsigned i = 0; i < type->length; i++) {
840 if (vtn_type_contains_block(b, type->members[i]))
841 return true;
842 }
843 return false;
844 default:
845 return false;
846 }
847 }
848
849 /** Returns true if two types are "compatible", i.e. you can do an OpLoad,
850 * OpStore, or OpCopyMemory between them without breaking anything.
851 * Technically, the SPIR-V rules require the exact same type ID but this lets
852 * us internally be a bit looser.
853 */
854 bool
vtn_types_compatible(struct vtn_builder * b,struct vtn_type * t1,struct vtn_type * t2)855 vtn_types_compatible(struct vtn_builder *b,
856 struct vtn_type *t1, struct vtn_type *t2)
857 {
858 if (t1->id == t2->id)
859 return true;
860
861 if (t1->base_type != t2->base_type)
862 return false;
863
864 switch (t1->base_type) {
865 case vtn_base_type_void:
866 case vtn_base_type_scalar:
867 case vtn_base_type_vector:
868 case vtn_base_type_matrix:
869 case vtn_base_type_image:
870 case vtn_base_type_sampler:
871 case vtn_base_type_sampled_image:
872 case vtn_base_type_event:
873 case vtn_base_type_cooperative_matrix:
874 return t1->type == t2->type;
875
876 case vtn_base_type_array:
877 return t1->length == t2->length &&
878 vtn_types_compatible(b, t1->array_element, t2->array_element);
879
880 case vtn_base_type_pointer:
881 return vtn_types_compatible(b, t1->deref, t2->deref);
882
883 case vtn_base_type_struct:
884 if (t1->length != t2->length)
885 return false;
886
887 for (unsigned i = 0; i < t1->length; i++) {
888 if (!vtn_types_compatible(b, t1->members[i], t2->members[i]))
889 return false;
890 }
891 return true;
892
893 case vtn_base_type_accel_struct:
894 case vtn_base_type_ray_query:
895 return true;
896
897 case vtn_base_type_function:
898 /* This case shouldn't get hit since you can't copy around function
899 * types. Just require them to be identical.
900 */
901 return false;
902 }
903
904 vtn_fail("Invalid base type");
905 }
906
907 struct vtn_type *
vtn_type_without_array(struct vtn_type * type)908 vtn_type_without_array(struct vtn_type *type)
909 {
910 while (type->base_type == vtn_base_type_array)
911 type = type->array_element;
912 return type;
913 }
914
915 /* does a shallow copy of a vtn_type */
916
917 static struct vtn_type *
vtn_type_copy(struct vtn_builder * b,struct vtn_type * src)918 vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
919 {
920 struct vtn_type *dest = vtn_alloc(b, struct vtn_type);
921 *dest = *src;
922
923 switch (src->base_type) {
924 case vtn_base_type_void:
925 case vtn_base_type_scalar:
926 case vtn_base_type_vector:
927 case vtn_base_type_matrix:
928 case vtn_base_type_array:
929 case vtn_base_type_pointer:
930 case vtn_base_type_image:
931 case vtn_base_type_sampler:
932 case vtn_base_type_sampled_image:
933 case vtn_base_type_event:
934 case vtn_base_type_accel_struct:
935 case vtn_base_type_ray_query:
936 case vtn_base_type_cooperative_matrix:
937 /* Nothing more to do */
938 break;
939
940 case vtn_base_type_struct:
941 dest->members = vtn_alloc_array(b, struct vtn_type *, src->length);
942 memcpy(dest->members, src->members,
943 src->length * sizeof(src->members[0]));
944
945 dest->offsets = vtn_alloc_array(b, unsigned, src->length);
946 memcpy(dest->offsets, src->offsets,
947 src->length * sizeof(src->offsets[0]));
948 break;
949
950 case vtn_base_type_function:
951 dest->params = vtn_alloc_array(b, struct vtn_type *, src->length);
952 memcpy(dest->params, src->params, src->length * sizeof(src->params[0]));
953 break;
954 }
955
956 return dest;
957 }
958
959 static bool
vtn_type_needs_explicit_layout(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)960 vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type,
961 enum vtn_variable_mode mode)
962 {
963 /* For OpenCL we never want to strip the info from the types, and it makes
964 * type comparisons easier in later stages.
965 */
966 if (b->options->environment == NIR_SPIRV_OPENCL)
967 return true;
968
969 switch (mode) {
970 case vtn_variable_mode_input:
971 case vtn_variable_mode_output:
972 /* Layout decorations kept because we need offsets for XFB arrays of
973 * blocks.
974 */
975 return b->shader->info.has_transform_feedback_varyings;
976
977 case vtn_variable_mode_ssbo:
978 case vtn_variable_mode_phys_ssbo:
979 case vtn_variable_mode_ubo:
980 case vtn_variable_mode_push_constant:
981 case vtn_variable_mode_shader_record:
982 return true;
983
984 case vtn_variable_mode_workgroup:
985 return b->options->caps.workgroup_memory_explicit_layout;
986
987 default:
988 return false;
989 }
990 }
991
992 const struct glsl_type *
vtn_type_get_nir_type(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)993 vtn_type_get_nir_type(struct vtn_builder *b, struct vtn_type *type,
994 enum vtn_variable_mode mode)
995 {
996 if (mode == vtn_variable_mode_atomic_counter) {
997 vtn_fail_if(glsl_without_array(type->type) != glsl_uint_type(),
998 "Variables in the AtomicCounter storage class should be "
999 "(possibly arrays of arrays of) uint.");
1000 return glsl_type_wrap_in_arrays(glsl_atomic_uint_type(), type->type);
1001 }
1002
1003 if (mode == vtn_variable_mode_uniform) {
1004 switch (type->base_type) {
1005 case vtn_base_type_array: {
1006 const struct glsl_type *elem_type =
1007 vtn_type_get_nir_type(b, type->array_element, mode);
1008
1009 return glsl_array_type(elem_type, type->length,
1010 glsl_get_explicit_stride(type->type));
1011 }
1012
1013 case vtn_base_type_struct: {
1014 bool need_new_struct = false;
1015 const uint32_t num_fields = type->length;
1016 NIR_VLA(struct glsl_struct_field, fields, num_fields);
1017 for (unsigned i = 0; i < num_fields; i++) {
1018 fields[i] = *glsl_get_struct_field_data(type->type, i);
1019 const struct glsl_type *field_nir_type =
1020 vtn_type_get_nir_type(b, type->members[i], mode);
1021 if (fields[i].type != field_nir_type) {
1022 fields[i].type = field_nir_type;
1023 need_new_struct = true;
1024 }
1025 }
1026 if (need_new_struct) {
1027 if (glsl_type_is_interface(type->type)) {
1028 return glsl_interface_type(fields, num_fields,
1029 /* packing */ 0, false,
1030 glsl_get_type_name(type->type));
1031 } else {
1032 return glsl_struct_type(fields, num_fields,
1033 glsl_get_type_name(type->type),
1034 glsl_struct_type_is_packed(type->type));
1035 }
1036 } else {
1037 /* No changes, just pass it on */
1038 return type->type;
1039 }
1040 }
1041
1042 case vtn_base_type_image:
1043 vtn_assert(glsl_type_is_texture(type->glsl_image));
1044 return type->glsl_image;
1045
1046 case vtn_base_type_sampler:
1047 return glsl_bare_sampler_type();
1048
1049 case vtn_base_type_sampled_image:
1050 return glsl_texture_type_to_sampler(type->image->glsl_image,
1051 false /* is_shadow */);
1052
1053 default:
1054 return type->type;
1055 }
1056 }
1057
1058 if (mode == vtn_variable_mode_image) {
1059 struct vtn_type *image_type = vtn_type_without_array(type);
1060 vtn_assert(image_type->base_type == vtn_base_type_image);
1061 return glsl_type_wrap_in_arrays(image_type->glsl_image, type->type);
1062 }
1063
1064 /* Layout decorations are allowed but ignored in certain conditions,
1065 * to allow SPIR-V generators perform type deduplication. Discard
1066 * unnecessary ones when passing to NIR.
1067 */
1068 if (!vtn_type_needs_explicit_layout(b, type, mode))
1069 return glsl_get_bare_type(type->type);
1070
1071 return type->type;
1072 }
1073
1074 static struct vtn_type *
mutable_matrix_member(struct vtn_builder * b,struct vtn_type * type,int member)1075 mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
1076 {
1077 type->members[member] = vtn_type_copy(b, type->members[member]);
1078 type = type->members[member];
1079
1080 /* We may have an array of matrices.... Oh, joy! */
1081 while (glsl_type_is_array(type->type)) {
1082 type->array_element = vtn_type_copy(b, type->array_element);
1083 type = type->array_element;
1084 }
1085
1086 vtn_assert(glsl_type_is_matrix(type->type));
1087
1088 return type;
1089 }
1090
1091 static void
vtn_handle_access_qualifier(struct vtn_builder * b,struct vtn_type * type,int member,enum gl_access_qualifier access)1092 vtn_handle_access_qualifier(struct vtn_builder *b, struct vtn_type *type,
1093 int member, enum gl_access_qualifier access)
1094 {
1095 type->members[member] = vtn_type_copy(b, type->members[member]);
1096 type = type->members[member];
1097
1098 type->access |= access;
1099 }
1100
1101 static void
array_stride_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1102 array_stride_decoration_cb(struct vtn_builder *b,
1103 struct vtn_value *val, int member,
1104 const struct vtn_decoration *dec, void *void_ctx)
1105 {
1106 struct vtn_type *type = val->type;
1107
1108 if (dec->decoration == SpvDecorationArrayStride) {
1109 if (vtn_type_contains_block(b, type)) {
1110 vtn_warn("The ArrayStride decoration cannot be applied to an array "
1111 "type which contains a structure type decorated Block "
1112 "or BufferBlock");
1113 /* Ignore the decoration */
1114 } else {
1115 vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero");
1116 type->stride = dec->operands[0];
1117 }
1118 }
1119 }
1120
1121 static void
struct_member_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1122 struct_member_decoration_cb(struct vtn_builder *b,
1123 UNUSED struct vtn_value *val, int member,
1124 const struct vtn_decoration *dec, void *void_ctx)
1125 {
1126 struct member_decoration_ctx *ctx = void_ctx;
1127
1128 if (member < 0)
1129 return;
1130
1131 assert(member < ctx->num_fields);
1132
1133 switch (dec->decoration) {
1134 case SpvDecorationRelaxedPrecision:
1135 case SpvDecorationUniform:
1136 case SpvDecorationUniformId:
1137 break; /* FIXME: Do nothing with this for now. */
1138 case SpvDecorationNonWritable:
1139 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_WRITEABLE);
1140 break;
1141 case SpvDecorationNonReadable:
1142 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_READABLE);
1143 break;
1144 case SpvDecorationVolatile:
1145 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_VOLATILE);
1146 break;
1147 case SpvDecorationCoherent:
1148 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_COHERENT);
1149 break;
1150 case SpvDecorationNoPerspective:
1151 ctx->fields[member].interpolation = INTERP_MODE_NOPERSPECTIVE;
1152 break;
1153 case SpvDecorationFlat:
1154 ctx->fields[member].interpolation = INTERP_MODE_FLAT;
1155 break;
1156 case SpvDecorationExplicitInterpAMD:
1157 ctx->fields[member].interpolation = INTERP_MODE_EXPLICIT;
1158 break;
1159 case SpvDecorationCentroid:
1160 ctx->fields[member].centroid = true;
1161 break;
1162 case SpvDecorationSample:
1163 ctx->fields[member].sample = true;
1164 break;
1165 case SpvDecorationStream:
1166 /* This is handled later by var_decoration_cb in vtn_variables.c */
1167 break;
1168 case SpvDecorationLocation:
1169 ctx->fields[member].location = dec->operands[0];
1170 break;
1171 case SpvDecorationComponent:
1172 break; /* FIXME: What should we do with these? */
1173 case SpvDecorationBuiltIn:
1174 ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
1175 ctx->type->members[member]->is_builtin = true;
1176 ctx->type->members[member]->builtin = dec->operands[0];
1177 ctx->type->builtin_block = true;
1178 break;
1179 case SpvDecorationOffset:
1180 ctx->type->offsets[member] = dec->operands[0];
1181 ctx->fields[member].offset = dec->operands[0];
1182 break;
1183 case SpvDecorationMatrixStride:
1184 /* Handled as a second pass */
1185 break;
1186 case SpvDecorationColMajor:
1187 break; /* Nothing to do here. Column-major is the default. */
1188 case SpvDecorationRowMajor:
1189 mutable_matrix_member(b, ctx->type, member)->row_major = true;
1190 break;
1191
1192 case SpvDecorationPatch:
1193 case SpvDecorationPerPrimitiveNV:
1194 case SpvDecorationPerTaskNV:
1195 case SpvDecorationPerViewNV:
1196 break;
1197
1198 case SpvDecorationSpecId:
1199 case SpvDecorationBlock:
1200 case SpvDecorationBufferBlock:
1201 case SpvDecorationArrayStride:
1202 case SpvDecorationGLSLShared:
1203 case SpvDecorationGLSLPacked:
1204 case SpvDecorationAliased:
1205 case SpvDecorationConstant:
1206 case SpvDecorationIndex:
1207 case SpvDecorationBinding:
1208 case SpvDecorationDescriptorSet:
1209 case SpvDecorationLinkageAttributes:
1210 case SpvDecorationNoContraction:
1211 case SpvDecorationInputAttachmentIndex:
1212 case SpvDecorationCPacked:
1213 vtn_warn("Decoration not allowed on struct members: %s",
1214 spirv_decoration_to_string(dec->decoration));
1215 break;
1216
1217 case SpvDecorationRestrict:
1218 /* While "Restrict" is invalid for struct members, glslang incorrectly
1219 * generates it and it ends up hiding actual driver issues in a wall of
1220 * spam from deqp-vk. Return it to the above block once the issue is
1221 * resolved. https://github.com/KhronosGroup/glslang/issues/703
1222 */
1223 break;
1224
1225 case SpvDecorationInvariant:
1226 /* Also incorrectly generated by glslang, ignore it. */
1227 break;
1228
1229 case SpvDecorationXfbBuffer:
1230 case SpvDecorationXfbStride:
1231 /* This is handled later by var_decoration_cb in vtn_variables.c */
1232 break;
1233
1234 case SpvDecorationSaturatedConversion:
1235 case SpvDecorationFuncParamAttr:
1236 case SpvDecorationFPRoundingMode:
1237 case SpvDecorationFPFastMathMode:
1238 case SpvDecorationAlignment:
1239 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1240 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1241 spirv_decoration_to_string(dec->decoration));
1242 }
1243 break;
1244
1245 case SpvDecorationUserSemantic:
1246 case SpvDecorationUserTypeGOOGLE:
1247 /* User semantic decorations can safely be ignored by the driver. */
1248 break;
1249
1250 default:
1251 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1252 }
1253 }
1254
1255 /** Chases the array type all the way down to the tail and rewrites the
1256 * glsl_types to be based off the tail's glsl_type.
1257 */
1258 static void
vtn_array_type_rewrite_glsl_type(struct vtn_type * type)1259 vtn_array_type_rewrite_glsl_type(struct vtn_type *type)
1260 {
1261 if (type->base_type != vtn_base_type_array)
1262 return;
1263
1264 vtn_array_type_rewrite_glsl_type(type->array_element);
1265
1266 type->type = glsl_array_type(type->array_element->type,
1267 type->length, type->stride);
1268 }
1269
1270 /* Matrix strides are handled as a separate pass because we need to know
1271 * whether the matrix is row-major or not first.
1272 */
1273 static void
struct_member_matrix_stride_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1274 struct_member_matrix_stride_cb(struct vtn_builder *b,
1275 UNUSED struct vtn_value *val, int member,
1276 const struct vtn_decoration *dec,
1277 void *void_ctx)
1278 {
1279 if (dec->decoration != SpvDecorationMatrixStride)
1280 return;
1281
1282 vtn_fail_if(member < 0,
1283 "The MatrixStride decoration is only allowed on members "
1284 "of OpTypeStruct");
1285 vtn_fail_if(dec->operands[0] == 0, "MatrixStride must be non-zero");
1286
1287 struct member_decoration_ctx *ctx = void_ctx;
1288
1289 struct vtn_type *mat_type = mutable_matrix_member(b, ctx->type, member);
1290 if (mat_type->row_major) {
1291 mat_type->array_element = vtn_type_copy(b, mat_type->array_element);
1292 mat_type->stride = mat_type->array_element->stride;
1293 mat_type->array_element->stride = dec->operands[0];
1294
1295 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1296 dec->operands[0], true);
1297 mat_type->array_element->type = glsl_get_column_type(mat_type->type);
1298 } else {
1299 vtn_assert(mat_type->array_element->stride > 0);
1300 mat_type->stride = dec->operands[0];
1301
1302 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1303 dec->operands[0], false);
1304 }
1305
1306 /* Now that we've replaced the glsl_type with a properly strided matrix
1307 * type, rewrite the member type so that it's an array of the proper kind
1308 * of glsl_type.
1309 */
1310 vtn_array_type_rewrite_glsl_type(ctx->type->members[member]);
1311 ctx->fields[member].type = ctx->type->members[member]->type;
1312 }
1313
1314 static void
struct_packed_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1315 struct_packed_decoration_cb(struct vtn_builder *b,
1316 struct vtn_value *val, int member,
1317 const struct vtn_decoration *dec, void *void_ctx)
1318 {
1319 vtn_assert(val->type->base_type == vtn_base_type_struct);
1320 if (dec->decoration == SpvDecorationCPacked) {
1321 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1322 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1323 spirv_decoration_to_string(dec->decoration));
1324 }
1325 val->type->packed = true;
1326 }
1327 }
1328
1329 static void
struct_block_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * ctx)1330 struct_block_decoration_cb(struct vtn_builder *b,
1331 struct vtn_value *val, int member,
1332 const struct vtn_decoration *dec, void *ctx)
1333 {
1334 if (member != -1)
1335 return;
1336
1337 struct vtn_type *type = val->type;
1338 if (dec->decoration == SpvDecorationBlock)
1339 type->block = true;
1340 else if (dec->decoration == SpvDecorationBufferBlock)
1341 type->buffer_block = true;
1342 }
1343
1344 static void
type_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,UNUSED void * ctx)1345 type_decoration_cb(struct vtn_builder *b,
1346 struct vtn_value *val, int member,
1347 const struct vtn_decoration *dec, UNUSED void *ctx)
1348 {
1349 struct vtn_type *type = val->type;
1350
1351 if (member != -1) {
1352 /* This should have been handled by OpTypeStruct */
1353 assert(val->type->base_type == vtn_base_type_struct);
1354 assert(member >= 0 && member < val->type->length);
1355 return;
1356 }
1357
1358 switch (dec->decoration) {
1359 case SpvDecorationArrayStride:
1360 vtn_assert(type->base_type == vtn_base_type_array ||
1361 type->base_type == vtn_base_type_pointer);
1362 break;
1363 case SpvDecorationBlock:
1364 vtn_assert(type->base_type == vtn_base_type_struct);
1365 vtn_assert(type->block);
1366 break;
1367 case SpvDecorationBufferBlock:
1368 vtn_assert(type->base_type == vtn_base_type_struct);
1369 vtn_assert(type->buffer_block);
1370 break;
1371 case SpvDecorationGLSLShared:
1372 case SpvDecorationGLSLPacked:
1373 /* Ignore these, since we get explicit offsets anyways */
1374 break;
1375
1376 case SpvDecorationRowMajor:
1377 case SpvDecorationColMajor:
1378 case SpvDecorationMatrixStride:
1379 case SpvDecorationBuiltIn:
1380 case SpvDecorationNoPerspective:
1381 case SpvDecorationFlat:
1382 case SpvDecorationPatch:
1383 case SpvDecorationCentroid:
1384 case SpvDecorationSample:
1385 case SpvDecorationExplicitInterpAMD:
1386 case SpvDecorationVolatile:
1387 case SpvDecorationCoherent:
1388 case SpvDecorationNonWritable:
1389 case SpvDecorationNonReadable:
1390 case SpvDecorationUniform:
1391 case SpvDecorationUniformId:
1392 case SpvDecorationLocation:
1393 case SpvDecorationComponent:
1394 case SpvDecorationOffset:
1395 case SpvDecorationXfbBuffer:
1396 case SpvDecorationXfbStride:
1397 case SpvDecorationUserSemantic:
1398 vtn_warn("Decoration only allowed for struct members: %s",
1399 spirv_decoration_to_string(dec->decoration));
1400 break;
1401
1402 case SpvDecorationStream:
1403 /* We don't need to do anything here, as stream is filled up when
1404 * aplying the decoration to a variable, just check that if it is not a
1405 * struct member, it should be a struct.
1406 */
1407 vtn_assert(type->base_type == vtn_base_type_struct);
1408 break;
1409
1410 case SpvDecorationRelaxedPrecision:
1411 case SpvDecorationSpecId:
1412 case SpvDecorationInvariant:
1413 case SpvDecorationRestrict:
1414 case SpvDecorationAliased:
1415 case SpvDecorationConstant:
1416 case SpvDecorationIndex:
1417 case SpvDecorationBinding:
1418 case SpvDecorationDescriptorSet:
1419 case SpvDecorationLinkageAttributes:
1420 case SpvDecorationNoContraction:
1421 case SpvDecorationInputAttachmentIndex:
1422 vtn_warn("Decoration not allowed on types: %s",
1423 spirv_decoration_to_string(dec->decoration));
1424 break;
1425
1426 case SpvDecorationCPacked:
1427 /* Handled when parsing a struct type, nothing to do here. */
1428 break;
1429
1430 case SpvDecorationSaturatedConversion:
1431 case SpvDecorationFuncParamAttr:
1432 case SpvDecorationFPRoundingMode:
1433 case SpvDecorationFPFastMathMode:
1434 case SpvDecorationAlignment:
1435 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1436 spirv_decoration_to_string(dec->decoration));
1437 break;
1438
1439 case SpvDecorationUserTypeGOOGLE:
1440 /* User semantic decorations can safely be ignored by the driver. */
1441 break;
1442
1443 default:
1444 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1445 }
1446 }
1447
1448 static unsigned
translate_image_format(struct vtn_builder * b,SpvImageFormat format)1449 translate_image_format(struct vtn_builder *b, SpvImageFormat format)
1450 {
1451 switch (format) {
1452 case SpvImageFormatUnknown: return PIPE_FORMAT_NONE;
1453 case SpvImageFormatRgba32f: return PIPE_FORMAT_R32G32B32A32_FLOAT;
1454 case SpvImageFormatRgba16f: return PIPE_FORMAT_R16G16B16A16_FLOAT;
1455 case SpvImageFormatR32f: return PIPE_FORMAT_R32_FLOAT;
1456 case SpvImageFormatRgba8: return PIPE_FORMAT_R8G8B8A8_UNORM;
1457 case SpvImageFormatRgba8Snorm: return PIPE_FORMAT_R8G8B8A8_SNORM;
1458 case SpvImageFormatRg32f: return PIPE_FORMAT_R32G32_FLOAT;
1459 case SpvImageFormatRg16f: return PIPE_FORMAT_R16G16_FLOAT;
1460 case SpvImageFormatR11fG11fB10f: return PIPE_FORMAT_R11G11B10_FLOAT;
1461 case SpvImageFormatR16f: return PIPE_FORMAT_R16_FLOAT;
1462 case SpvImageFormatRgba16: return PIPE_FORMAT_R16G16B16A16_UNORM;
1463 case SpvImageFormatRgb10A2: return PIPE_FORMAT_R10G10B10A2_UNORM;
1464 case SpvImageFormatRg16: return PIPE_FORMAT_R16G16_UNORM;
1465 case SpvImageFormatRg8: return PIPE_FORMAT_R8G8_UNORM;
1466 case SpvImageFormatR16: return PIPE_FORMAT_R16_UNORM;
1467 case SpvImageFormatR8: return PIPE_FORMAT_R8_UNORM;
1468 case SpvImageFormatRgba16Snorm: return PIPE_FORMAT_R16G16B16A16_SNORM;
1469 case SpvImageFormatRg16Snorm: return PIPE_FORMAT_R16G16_SNORM;
1470 case SpvImageFormatRg8Snorm: return PIPE_FORMAT_R8G8_SNORM;
1471 case SpvImageFormatR16Snorm: return PIPE_FORMAT_R16_SNORM;
1472 case SpvImageFormatR8Snorm: return PIPE_FORMAT_R8_SNORM;
1473 case SpvImageFormatRgba32i: return PIPE_FORMAT_R32G32B32A32_SINT;
1474 case SpvImageFormatRgba16i: return PIPE_FORMAT_R16G16B16A16_SINT;
1475 case SpvImageFormatRgba8i: return PIPE_FORMAT_R8G8B8A8_SINT;
1476 case SpvImageFormatR32i: return PIPE_FORMAT_R32_SINT;
1477 case SpvImageFormatRg32i: return PIPE_FORMAT_R32G32_SINT;
1478 case SpvImageFormatRg16i: return PIPE_FORMAT_R16G16_SINT;
1479 case SpvImageFormatRg8i: return PIPE_FORMAT_R8G8_SINT;
1480 case SpvImageFormatR16i: return PIPE_FORMAT_R16_SINT;
1481 case SpvImageFormatR8i: return PIPE_FORMAT_R8_SINT;
1482 case SpvImageFormatRgba32ui: return PIPE_FORMAT_R32G32B32A32_UINT;
1483 case SpvImageFormatRgba16ui: return PIPE_FORMAT_R16G16B16A16_UINT;
1484 case SpvImageFormatRgba8ui: return PIPE_FORMAT_R8G8B8A8_UINT;
1485 case SpvImageFormatR32ui: return PIPE_FORMAT_R32_UINT;
1486 case SpvImageFormatRgb10a2ui: return PIPE_FORMAT_R10G10B10A2_UINT;
1487 case SpvImageFormatRg32ui: return PIPE_FORMAT_R32G32_UINT;
1488 case SpvImageFormatRg16ui: return PIPE_FORMAT_R16G16_UINT;
1489 case SpvImageFormatRg8ui: return PIPE_FORMAT_R8G8_UINT;
1490 case SpvImageFormatR16ui: return PIPE_FORMAT_R16_UINT;
1491 case SpvImageFormatR8ui: return PIPE_FORMAT_R8_UINT;
1492 case SpvImageFormatR64ui: return PIPE_FORMAT_R64_UINT;
1493 case SpvImageFormatR64i: return PIPE_FORMAT_R64_SINT;
1494 default:
1495 vtn_fail("Invalid image format: %s (%u)",
1496 spirv_imageformat_to_string(format), format);
1497 }
1498 }
1499
1500 static void
validate_image_type_for_sampled_image(struct vtn_builder * b,const struct glsl_type * image_type,const char * operand)1501 validate_image_type_for_sampled_image(struct vtn_builder *b,
1502 const struct glsl_type *image_type,
1503 const char *operand)
1504 {
1505 /* From OpTypeSampledImage description in SPIR-V 1.6, revision 1:
1506 *
1507 * Image Type must be an OpTypeImage. It is the type of the image in the
1508 * combined sampler and image type. It must not have a Dim of
1509 * SubpassData. Additionally, starting with version 1.6, it must not have
1510 * a Dim of Buffer.
1511 *
1512 * Same also applies to the type of the Image operand in OpSampledImage.
1513 */
1514
1515 const enum glsl_sampler_dim dim = glsl_get_sampler_dim(image_type);
1516
1517 vtn_fail_if(dim == GLSL_SAMPLER_DIM_SUBPASS ||
1518 dim == GLSL_SAMPLER_DIM_SUBPASS_MS,
1519 "%s must not have a Dim of SubpassData.", operand);
1520
1521 if (dim == GLSL_SAMPLER_DIM_BUF) {
1522 if (b->version >= 0x10600) {
1523 vtn_fail("Starting with SPIR-V 1.6, %s "
1524 "must not have a Dim of Buffer.", operand);
1525 } else {
1526 vtn_warn("%s should not have a Dim of Buffer.", operand);
1527 }
1528 }
1529 }
1530
1531 static void
vtn_handle_type(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1532 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
1533 const uint32_t *w, unsigned count)
1534 {
1535 struct vtn_value *val = NULL;
1536
1537 /* In order to properly handle forward declarations, we have to defer
1538 * allocation for pointer types.
1539 */
1540 if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) {
1541 val = vtn_push_value(b, w[1], vtn_value_type_type);
1542 vtn_fail_if(val->type != NULL,
1543 "Only pointers can have forward declarations");
1544 val->type = vtn_zalloc(b, struct vtn_type);
1545 val->type->id = w[1];
1546 }
1547
1548 switch (opcode) {
1549 case SpvOpTypeVoid:
1550 val->type->base_type = vtn_base_type_void;
1551 val->type->type = glsl_void_type();
1552 break;
1553 case SpvOpTypeBool:
1554 val->type->base_type = vtn_base_type_scalar;
1555 val->type->type = glsl_bool_type();
1556 val->type->length = 1;
1557 break;
1558 case SpvOpTypeInt: {
1559 int bit_size = w[2];
1560 const bool signedness = w[3];
1561 vtn_fail_if(bit_size != 8 && bit_size != 16 &&
1562 bit_size != 32 && bit_size != 64,
1563 "Invalid int bit size: %u", bit_size);
1564 val->type->base_type = vtn_base_type_scalar;
1565 val->type->type = signedness ? glsl_intN_t_type(bit_size) :
1566 glsl_uintN_t_type(bit_size);
1567 val->type->length = 1;
1568 break;
1569 }
1570
1571 case SpvOpTypeFloat: {
1572 int bit_size = w[2];
1573 val->type->base_type = vtn_base_type_scalar;
1574 vtn_fail_if(bit_size != 16 && bit_size != 32 && bit_size != 64,
1575 "Invalid float bit size: %u", bit_size);
1576 val->type->type = glsl_floatN_t_type(bit_size);
1577 val->type->length = 1;
1578 break;
1579 }
1580
1581 case SpvOpTypeVector: {
1582 struct vtn_type *base = vtn_get_type(b, w[2]);
1583 unsigned elems = w[3];
1584
1585 vtn_fail_if(base->base_type != vtn_base_type_scalar,
1586 "Base type for OpTypeVector must be a scalar");
1587 vtn_fail_if((elems < 2 || elems > 4) && (elems != 8) && (elems != 16),
1588 "Invalid component count for OpTypeVector");
1589
1590 val->type->base_type = vtn_base_type_vector;
1591 val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
1592 val->type->length = elems;
1593 val->type->stride = glsl_type_is_boolean(val->type->type)
1594 ? 4 : glsl_get_bit_size(base->type) / 8;
1595 val->type->array_element = base;
1596 break;
1597 }
1598
1599 case SpvOpTypeMatrix: {
1600 struct vtn_type *base = vtn_get_type(b, w[2]);
1601 unsigned columns = w[3];
1602
1603 vtn_fail_if(base->base_type != vtn_base_type_vector,
1604 "Base type for OpTypeMatrix must be a vector");
1605 vtn_fail_if(columns < 2 || columns > 4,
1606 "Invalid column count for OpTypeMatrix");
1607
1608 val->type->base_type = vtn_base_type_matrix;
1609 val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
1610 glsl_get_vector_elements(base->type),
1611 columns);
1612 vtn_fail_if(glsl_type_is_error(val->type->type),
1613 "Unsupported base type for OpTypeMatrix");
1614 assert(!glsl_type_is_error(val->type->type));
1615 val->type->length = columns;
1616 val->type->array_element = base;
1617 val->type->row_major = false;
1618 val->type->stride = 0;
1619 break;
1620 }
1621
1622 case SpvOpTypeRuntimeArray:
1623 case SpvOpTypeArray: {
1624 struct vtn_type *array_element = vtn_get_type(b, w[2]);
1625
1626 if (opcode == SpvOpTypeRuntimeArray) {
1627 /* A length of 0 is used to denote unsized arrays */
1628 val->type->length = 0;
1629 } else {
1630 val->type->length = vtn_constant_uint(b, w[3]);
1631 }
1632
1633 val->type->base_type = vtn_base_type_array;
1634 val->type->array_element = array_element;
1635
1636 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1637 val->type->type = glsl_array_type(array_element->type, val->type->length,
1638 val->type->stride);
1639 break;
1640 }
1641
1642 case SpvOpTypeStruct: {
1643 unsigned num_fields = count - 2;
1644 val->type->base_type = vtn_base_type_struct;
1645 val->type->length = num_fields;
1646 val->type->members = vtn_alloc_array(b, struct vtn_type *, num_fields);
1647 val->type->offsets = vtn_alloc_array(b, unsigned, num_fields);
1648 val->type->packed = false;
1649
1650 NIR_VLA(struct glsl_struct_field, fields, count);
1651 for (unsigned i = 0; i < num_fields; i++) {
1652 val->type->members[i] = vtn_get_type(b, w[i + 2]);
1653 const char *name = NULL;
1654 for (struct vtn_decoration *dec = val->decoration; dec; dec = dec->next) {
1655 if (dec->scope == VTN_DEC_STRUCT_MEMBER_NAME0 - i) {
1656 name = dec->member_name;
1657 break;
1658 }
1659 }
1660 if (!name)
1661 name = ralloc_asprintf(b, "field%d", i);
1662
1663 fields[i] = (struct glsl_struct_field) {
1664 .type = val->type->members[i]->type,
1665 .name = name,
1666 .location = -1,
1667 .offset = -1,
1668 };
1669 }
1670
1671 vtn_foreach_decoration(b, val, struct_packed_decoration_cb, NULL);
1672
1673 struct member_decoration_ctx ctx = {
1674 .num_fields = num_fields,
1675 .fields = fields,
1676 .type = val->type
1677 };
1678
1679 vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
1680
1681 /* Propagate access specifiers that are present on all members to the overall type */
1682 enum gl_access_qualifier overall_access = ACCESS_COHERENT | ACCESS_VOLATILE |
1683 ACCESS_NON_READABLE | ACCESS_NON_WRITEABLE;
1684 for (unsigned i = 0; i < num_fields; ++i)
1685 overall_access &= val->type->members[i]->access;
1686 val->type->access = overall_access;
1687
1688 vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx);
1689
1690 vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL);
1691
1692 const char *name = val->name;
1693
1694 if (val->type->block || val->type->buffer_block) {
1695 /* Packing will be ignored since types coming from SPIR-V are
1696 * explicitly laid out.
1697 */
1698 val->type->type = glsl_interface_type(fields, num_fields,
1699 /* packing */ 0, false,
1700 name ? name : "block");
1701 } else {
1702 val->type->type = glsl_struct_type(fields, num_fields,
1703 name ? name : "struct",
1704 val->type->packed);
1705 }
1706 break;
1707 }
1708
1709 case SpvOpTypeFunction: {
1710 val->type->base_type = vtn_base_type_function;
1711 val->type->type = NULL;
1712
1713 val->type->return_type = vtn_get_type(b, w[2]);
1714
1715 const unsigned num_params = count - 3;
1716 val->type->length = num_params;
1717 val->type->params = vtn_alloc_array(b, struct vtn_type *, num_params);
1718 for (unsigned i = 0; i < count - 3; i++) {
1719 val->type->params[i] = vtn_get_type(b, w[i + 3]);
1720 }
1721 break;
1722 }
1723
1724 case SpvOpTypePointer:
1725 case SpvOpTypeForwardPointer: {
1726 /* We can't blindly push the value because it might be a forward
1727 * declaration.
1728 */
1729 val = vtn_untyped_value(b, w[1]);
1730
1731 SpvStorageClass storage_class = w[2];
1732
1733 vtn_fail_if(opcode == SpvOpTypeForwardPointer &&
1734 b->shader->info.stage != MESA_SHADER_KERNEL &&
1735 storage_class != SpvStorageClassPhysicalStorageBuffer,
1736 "OpTypeForwardPointer is only allowed in Vulkan with "
1737 "the PhysicalStorageBuffer storage class");
1738
1739 struct vtn_type *deref_type = NULL;
1740 if (opcode == SpvOpTypePointer)
1741 deref_type = vtn_get_type(b, w[3]);
1742
1743 bool has_forward_pointer = false;
1744 if (val->value_type == vtn_value_type_invalid) {
1745 val->value_type = vtn_value_type_type;
1746 val->type = vtn_zalloc(b, struct vtn_type);
1747 val->type->id = w[1];
1748 val->type->base_type = vtn_base_type_pointer;
1749 val->type->storage_class = storage_class;
1750
1751 /* These can actually be stored to nir_variables and used as SSA
1752 * values so they need a real glsl_type.
1753 */
1754 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
1755 b, storage_class, deref_type, NULL);
1756
1757 /* The deref type should only matter for the UniformConstant storage
1758 * class. In particular, it should never matter for any storage
1759 * classes that are allowed in combination with OpTypeForwardPointer.
1760 */
1761 if (storage_class != SpvStorageClassUniform &&
1762 storage_class != SpvStorageClassUniformConstant) {
1763 assert(mode == vtn_storage_class_to_mode(b, storage_class,
1764 NULL, NULL));
1765 }
1766
1767 val->type->type = nir_address_format_to_glsl_type(
1768 vtn_mode_to_address_format(b, mode));
1769 } else {
1770 vtn_fail_if(val->type->storage_class != storage_class,
1771 "The storage classes of an OpTypePointer and any "
1772 "OpTypeForwardPointers that provide forward "
1773 "declarations of it must match.");
1774 has_forward_pointer = true;
1775 }
1776
1777 if (opcode == SpvOpTypePointer) {
1778 vtn_fail_if(val->type->deref != NULL,
1779 "While OpTypeForwardPointer can be used to provide a "
1780 "forward declaration of a pointer, OpTypePointer can "
1781 "only be used once for a given id.");
1782
1783 vtn_fail_if(has_forward_pointer &&
1784 deref_type->base_type != vtn_base_type_struct,
1785 "An OpTypePointer instruction must declare "
1786 "Pointer Type to be a pointer to an OpTypeStruct.");
1787
1788 val->type->deref = deref_type;
1789
1790 /* Only certain storage classes use ArrayStride. */
1791 switch (storage_class) {
1792 case SpvStorageClassWorkgroup:
1793 if (!b->options->caps.workgroup_memory_explicit_layout)
1794 break;
1795 FALLTHROUGH;
1796
1797 case SpvStorageClassUniform:
1798 case SpvStorageClassPushConstant:
1799 case SpvStorageClassStorageBuffer:
1800 case SpvStorageClassPhysicalStorageBuffer:
1801 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1802 break;
1803
1804 default:
1805 /* Nothing to do. */
1806 break;
1807 }
1808 }
1809 break;
1810 }
1811
1812 case SpvOpTypeImage: {
1813 val->type->base_type = vtn_base_type_image;
1814
1815 /* Images are represented in NIR as a scalar SSA value that is the
1816 * result of a deref instruction. An OpLoad on an OpTypeImage pointer
1817 * from UniformConstant memory just takes the NIR deref from the pointer
1818 * and turns it into an SSA value.
1819 */
1820 val->type->type = nir_address_format_to_glsl_type(
1821 vtn_mode_to_address_format(b, vtn_variable_mode_function));
1822
1823 const struct vtn_type *sampled_type = vtn_get_type(b, w[2]);
1824 if (b->shader->info.stage == MESA_SHADER_KERNEL) {
1825 vtn_fail_if(sampled_type->base_type != vtn_base_type_void,
1826 "Sampled type of OpTypeImage must be void for kernels");
1827 } else {
1828 vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar,
1829 "Sampled type of OpTypeImage must be a scalar");
1830 if (b->options->caps.image_atomic_int64) {
1831 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32 &&
1832 glsl_get_bit_size(sampled_type->type) != 64,
1833 "Sampled type of OpTypeImage must be a 32 or 64-bit "
1834 "scalar");
1835 } else {
1836 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32,
1837 "Sampled type of OpTypeImage must be a 32-bit scalar");
1838 }
1839 }
1840
1841 enum glsl_sampler_dim dim;
1842 switch ((SpvDim)w[3]) {
1843 case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break;
1844 case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break;
1845 case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break;
1846 case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break;
1847 case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break;
1848 case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break;
1849 case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break;
1850 default:
1851 vtn_fail("Invalid SPIR-V image dimensionality: %s (%u)",
1852 spirv_dim_to_string((SpvDim)w[3]), w[3]);
1853 }
1854
1855 /* w[4]: as per Vulkan spec "Validation Rules within a Module",
1856 * The “Depth” operand of OpTypeImage is ignored.
1857 */
1858 bool is_array = w[5];
1859 bool multisampled = w[6];
1860 unsigned sampled = w[7];
1861 SpvImageFormat format = w[8];
1862
1863 if (count > 9)
1864 val->type->access_qualifier = w[9];
1865 else if (b->shader->info.stage == MESA_SHADER_KERNEL)
1866 /* Per the CL C spec: If no qualifier is provided, read_only is assumed. */
1867 val->type->access_qualifier = SpvAccessQualifierReadOnly;
1868 else
1869 val->type->access_qualifier = SpvAccessQualifierReadWrite;
1870
1871 if (multisampled) {
1872 if (dim == GLSL_SAMPLER_DIM_2D)
1873 dim = GLSL_SAMPLER_DIM_MS;
1874 else if (dim == GLSL_SAMPLER_DIM_SUBPASS)
1875 dim = GLSL_SAMPLER_DIM_SUBPASS_MS;
1876 else
1877 vtn_fail("Unsupported multisampled image type");
1878 }
1879
1880 val->type->image_format = translate_image_format(b, format);
1881
1882 enum glsl_base_type sampled_base_type =
1883 glsl_get_base_type(sampled_type->type);
1884 if (sampled == 1) {
1885 val->type->glsl_image = glsl_texture_type(dim, is_array,
1886 sampled_base_type);
1887 } else if (sampled == 2) {
1888 val->type->glsl_image = glsl_image_type(dim, is_array,
1889 sampled_base_type);
1890 } else if (b->shader->info.stage == MESA_SHADER_KERNEL) {
1891 val->type->glsl_image = glsl_image_type(dim, is_array,
1892 GLSL_TYPE_VOID);
1893 } else {
1894 vtn_fail("We need to know if the image will be sampled");
1895 }
1896 break;
1897 }
1898
1899 case SpvOpTypeSampledImage: {
1900 val->type->base_type = vtn_base_type_sampled_image;
1901 val->type->image = vtn_get_type(b, w[2]);
1902
1903 validate_image_type_for_sampled_image(
1904 b, val->type->image->glsl_image,
1905 "Image Type operand of OpTypeSampledImage");
1906
1907 /* Sampled images are represented NIR as a vec2 SSA value where each
1908 * component is the result of a deref instruction. The first component
1909 * is the image and the second is the sampler. An OpLoad on an
1910 * OpTypeSampledImage pointer from UniformConstant memory just takes
1911 * the NIR deref from the pointer and duplicates it to both vector
1912 * components.
1913 */
1914 nir_address_format addr_format =
1915 vtn_mode_to_address_format(b, vtn_variable_mode_function);
1916 assert(nir_address_format_num_components(addr_format) == 1);
1917 unsigned bit_size = nir_address_format_bit_size(addr_format);
1918 assert(bit_size == 32 || bit_size == 64);
1919
1920 enum glsl_base_type base_type =
1921 bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64;
1922 val->type->type = glsl_vector_type(base_type, 2);
1923 break;
1924 }
1925
1926 case SpvOpTypeSampler:
1927 val->type->base_type = vtn_base_type_sampler;
1928
1929 /* Samplers are represented in NIR as a scalar SSA value that is the
1930 * result of a deref instruction. An OpLoad on an OpTypeSampler pointer
1931 * from UniformConstant memory just takes the NIR deref from the pointer
1932 * and turns it into an SSA value.
1933 */
1934 val->type->type = nir_address_format_to_glsl_type(
1935 vtn_mode_to_address_format(b, vtn_variable_mode_function));
1936 break;
1937
1938 case SpvOpTypeAccelerationStructureKHR:
1939 val->type->base_type = vtn_base_type_accel_struct;
1940 val->type->type = glsl_uint64_t_type();
1941 break;
1942
1943
1944 case SpvOpTypeOpaque: {
1945 val->type->base_type = vtn_base_type_struct;
1946 const char *name = vtn_string_literal(b, &w[2], count - 2, NULL);
1947 val->type->type = glsl_struct_type(NULL, 0, name, false);
1948 break;
1949 }
1950
1951 case SpvOpTypeRayQueryKHR: {
1952 val->type->base_type = vtn_base_type_ray_query;
1953 val->type->type = glsl_uint64_t_type();
1954 /* We may need to run queries on helper invocations. Here the parser
1955 * doesn't go through a deeper analysis on whether the result of a query
1956 * will be used in derivative instructions.
1957 *
1958 * An implementation willing to optimize this would look through the IR
1959 * and check if any derivative instruction uses the result of a query
1960 * and drop this flag if not.
1961 */
1962 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
1963 val->type->access = ACCESS_INCLUDE_HELPERS;
1964 break;
1965 }
1966
1967 case SpvOpTypeCooperativeMatrixKHR:
1968 vtn_handle_cooperative_type(b, val, opcode, w, count);
1969 break;
1970
1971 case SpvOpTypeEvent:
1972 val->type->base_type = vtn_base_type_event;
1973 /*
1974 * this makes the event type compatible with pointer size due to LLVM 16.
1975 * llvm 17 fixes this properly, but with 16 and opaque ptrs it's still wrong.
1976 */
1977 val->type->type = b->shader->info.cs.ptr_size == 64 ? glsl_int64_t_type() : glsl_int_type();
1978 break;
1979
1980 case SpvOpTypeDeviceEvent:
1981 case SpvOpTypeReserveId:
1982 case SpvOpTypeQueue:
1983 case SpvOpTypePipe:
1984 default:
1985 vtn_fail_with_opcode("Unhandled opcode", opcode);
1986 }
1987
1988 vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
1989
1990 if (val->type->base_type == vtn_base_type_struct &&
1991 (val->type->block || val->type->buffer_block)) {
1992 for (unsigned i = 0; i < val->type->length; i++) {
1993 vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]),
1994 "Block and BufferBlock decorations cannot decorate a "
1995 "structure type that is nested at any level inside "
1996 "another structure type decorated with Block or "
1997 "BufferBlock.");
1998 }
1999 }
2000 }
2001
2002 static nir_constant *
vtn_null_constant(struct vtn_builder * b,struct vtn_type * type)2003 vtn_null_constant(struct vtn_builder *b, struct vtn_type *type)
2004 {
2005 nir_constant *c = rzalloc(b, nir_constant);
2006
2007 switch (type->base_type) {
2008 case vtn_base_type_scalar:
2009 case vtn_base_type_vector:
2010 c->is_null_constant = true;
2011 /* Nothing to do here. It's already initialized to zero */
2012 break;
2013
2014 case vtn_base_type_pointer: {
2015 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
2016 b, type->storage_class, type->deref, NULL);
2017 nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
2018
2019 const nir_const_value *null_value = nir_address_format_null_value(addr_format);
2020 memcpy(c->values, null_value,
2021 sizeof(nir_const_value) * nir_address_format_num_components(addr_format));
2022 break;
2023 }
2024
2025 case vtn_base_type_void:
2026 case vtn_base_type_image:
2027 case vtn_base_type_sampler:
2028 case vtn_base_type_sampled_image:
2029 case vtn_base_type_function:
2030 case vtn_base_type_event:
2031 /* For those we have to return something but it doesn't matter what. */
2032 break;
2033
2034 case vtn_base_type_matrix:
2035 case vtn_base_type_array:
2036 vtn_assert(type->length > 0);
2037 c->is_null_constant = true;
2038 c->num_elements = type->length;
2039 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2040
2041 c->elements[0] = vtn_null_constant(b, type->array_element);
2042 for (unsigned i = 1; i < c->num_elements; i++)
2043 c->elements[i] = c->elements[0];
2044 break;
2045
2046 case vtn_base_type_struct:
2047 c->is_null_constant = true;
2048 c->num_elements = type->length;
2049 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2050 for (unsigned i = 0; i < c->num_elements; i++)
2051 c->elements[i] = vtn_null_constant(b, type->members[i]);
2052 break;
2053
2054 default:
2055 vtn_fail("Invalid type for null constant");
2056 }
2057
2058 return c;
2059 }
2060
2061 static void
spec_constant_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,void * data)2062 spec_constant_decoration_cb(struct vtn_builder *b, UNUSED struct vtn_value *val,
2063 ASSERTED int member,
2064 const struct vtn_decoration *dec, void *data)
2065 {
2066 vtn_assert(member == -1);
2067 if (dec->decoration != SpvDecorationSpecId)
2068 return;
2069
2070 nir_const_value *value = data;
2071 for (unsigned i = 0; i < b->num_specializations; i++) {
2072 if (b->specializations[i].id == dec->operands[0]) {
2073 *value = b->specializations[i].value;
2074 return;
2075 }
2076 }
2077 }
2078
2079 static void
handle_workgroup_size_decoration_cb(struct vtn_builder * b,struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,UNUSED void * data)2080 handle_workgroup_size_decoration_cb(struct vtn_builder *b,
2081 struct vtn_value *val,
2082 ASSERTED int member,
2083 const struct vtn_decoration *dec,
2084 UNUSED void *data)
2085 {
2086 vtn_assert(member == -1);
2087 if (dec->decoration != SpvDecorationBuiltIn ||
2088 dec->operands[0] != SpvBuiltInWorkgroupSize)
2089 return;
2090
2091 vtn_assert(val->type->type == glsl_vector_type(GLSL_TYPE_UINT, 3));
2092 b->workgroup_size_builtin = val;
2093 }
2094
2095 static void
vtn_handle_constant(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)2096 vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
2097 const uint32_t *w, unsigned count)
2098 {
2099 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
2100 val->constant = rzalloc(b, nir_constant);
2101 switch (opcode) {
2102 case SpvOpConstantTrue:
2103 case SpvOpConstantFalse:
2104 case SpvOpSpecConstantTrue:
2105 case SpvOpSpecConstantFalse: {
2106 vtn_fail_if(val->type->type != glsl_bool_type(),
2107 "Result type of %s must be OpTypeBool",
2108 spirv_op_to_string(opcode));
2109
2110 bool bval = (opcode == SpvOpConstantTrue ||
2111 opcode == SpvOpSpecConstantTrue);
2112
2113 nir_const_value u32val = nir_const_value_for_uint(bval, 32);
2114
2115 if (opcode == SpvOpSpecConstantTrue ||
2116 opcode == SpvOpSpecConstantFalse)
2117 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32val);
2118
2119 val->constant->values[0].b = u32val.u32 != 0;
2120 break;
2121 }
2122
2123 case SpvOpConstant:
2124 case SpvOpSpecConstant: {
2125 vtn_fail_if(val->type->base_type != vtn_base_type_scalar,
2126 "Result type of %s must be a scalar",
2127 spirv_op_to_string(opcode));
2128 int bit_size = glsl_get_bit_size(val->type->type);
2129 switch (bit_size) {
2130 case 64:
2131 val->constant->values[0].u64 = vtn_u64_literal(&w[3]);
2132 break;
2133 case 32:
2134 val->constant->values[0].u32 = w[3];
2135 break;
2136 case 16:
2137 val->constant->values[0].u16 = w[3];
2138 break;
2139 case 8:
2140 val->constant->values[0].u8 = w[3];
2141 break;
2142 default:
2143 vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size);
2144 }
2145
2146 if (opcode == SpvOpSpecConstant)
2147 vtn_foreach_decoration(b, val, spec_constant_decoration_cb,
2148 &val->constant->values[0]);
2149 break;
2150 }
2151
2152 case SpvOpSpecConstantComposite:
2153 case SpvOpConstantComposite: {
2154 unsigned elem_count = count - 3;
2155 unsigned expected_length = val->type->base_type == vtn_base_type_cooperative_matrix ?
2156 1 : val->type->length;
2157 vtn_fail_if(elem_count != expected_length,
2158 "%s has %u constituents, expected %u",
2159 spirv_op_to_string(opcode), elem_count, expected_length);
2160
2161 nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
2162 val->is_undef_constant = true;
2163 for (unsigned i = 0; i < elem_count; i++) {
2164 struct vtn_value *elem_val = vtn_untyped_value(b, w[i + 3]);
2165
2166 if (elem_val->value_type == vtn_value_type_constant) {
2167 elems[i] = elem_val->constant;
2168 val->is_undef_constant = val->is_undef_constant &&
2169 elem_val->is_undef_constant;
2170 } else {
2171 vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
2172 "only constants or undefs allowed for "
2173 "SpvOpConstantComposite");
2174 /* to make it easier, just insert a NULL constant for now */
2175 elems[i] = vtn_null_constant(b, elem_val->type);
2176 }
2177 }
2178
2179 switch (val->type->base_type) {
2180 case vtn_base_type_vector: {
2181 assert(glsl_type_is_vector(val->type->type));
2182 for (unsigned i = 0; i < elem_count; i++)
2183 val->constant->values[i] = elems[i]->values[0];
2184 break;
2185 }
2186
2187 case vtn_base_type_matrix:
2188 case vtn_base_type_struct:
2189 case vtn_base_type_array:
2190 ralloc_steal(val->constant, elems);
2191 val->constant->num_elements = elem_count;
2192 val->constant->elements = elems;
2193 break;
2194
2195 case vtn_base_type_cooperative_matrix:
2196 val->constant->values[0] = elems[0]->values[0];
2197 break;
2198
2199 default:
2200 vtn_fail("Result type of %s must be a composite type",
2201 spirv_op_to_string(opcode));
2202 }
2203 break;
2204 }
2205
2206 case SpvOpSpecConstantOp: {
2207 nir_const_value u32op = nir_const_value_for_uint(w[3], 32);
2208 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32op);
2209 SpvOp opcode = u32op.u32;
2210 switch (opcode) {
2211 case SpvOpVectorShuffle: {
2212 struct vtn_value *v0 = &b->values[w[4]];
2213 struct vtn_value *v1 = &b->values[w[5]];
2214
2215 vtn_assert(v0->value_type == vtn_value_type_constant ||
2216 v0->value_type == vtn_value_type_undef);
2217 vtn_assert(v1->value_type == vtn_value_type_constant ||
2218 v1->value_type == vtn_value_type_undef);
2219
2220 unsigned len0 = glsl_get_vector_elements(v0->type->type);
2221 unsigned len1 = glsl_get_vector_elements(v1->type->type);
2222
2223 vtn_assert(len0 + len1 < 16);
2224
2225 unsigned bit_size = glsl_get_bit_size(val->type->type);
2226 unsigned bit_size0 = glsl_get_bit_size(v0->type->type);
2227 unsigned bit_size1 = glsl_get_bit_size(v1->type->type);
2228
2229 vtn_assert(bit_size == bit_size0 && bit_size == bit_size1);
2230 (void)bit_size0; (void)bit_size1;
2231
2232 nir_const_value undef = { .u64 = 0xdeadbeefdeadbeef };
2233 nir_const_value combined[NIR_MAX_VEC_COMPONENTS * 2];
2234
2235 if (v0->value_type == vtn_value_type_constant) {
2236 for (unsigned i = 0; i < len0; i++)
2237 combined[i] = v0->constant->values[i];
2238 }
2239 if (v1->value_type == vtn_value_type_constant) {
2240 for (unsigned i = 0; i < len1; i++)
2241 combined[len0 + i] = v1->constant->values[i];
2242 }
2243
2244 for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
2245 uint32_t comp = w[i + 6];
2246 if (comp == (uint32_t)-1) {
2247 /* If component is not used, set the value to a known constant
2248 * to detect if it is wrongly used.
2249 */
2250 val->constant->values[j] = undef;
2251 } else {
2252 vtn_fail_if(comp >= len0 + len1,
2253 "All Component literals must either be FFFFFFFF "
2254 "or in [0, N - 1] (inclusive).");
2255 val->constant->values[j] = combined[comp];
2256 }
2257 }
2258 break;
2259 }
2260
2261 case SpvOpCompositeExtract:
2262 case SpvOpCompositeInsert: {
2263 struct vtn_value *comp;
2264 unsigned deref_start;
2265 struct nir_constant **c;
2266 if (opcode == SpvOpCompositeExtract) {
2267 comp = vtn_value(b, w[4], vtn_value_type_constant);
2268 deref_start = 5;
2269 c = &comp->constant;
2270 } else {
2271 comp = vtn_value(b, w[5], vtn_value_type_constant);
2272 deref_start = 6;
2273 val->constant = nir_constant_clone(comp->constant,
2274 (nir_variable *)b);
2275 c = &val->constant;
2276 }
2277
2278 int elem = -1;
2279 const struct vtn_type *type = comp->type;
2280 for (unsigned i = deref_start; i < count; i++) {
2281 vtn_fail_if(w[i] > type->length,
2282 "%uth index of %s is %u but the type has only "
2283 "%u elements", i - deref_start,
2284 spirv_op_to_string(opcode), w[i], type->length);
2285
2286 switch (type->base_type) {
2287 case vtn_base_type_vector:
2288 elem = w[i];
2289 type = type->array_element;
2290 break;
2291
2292 case vtn_base_type_matrix:
2293 case vtn_base_type_array:
2294 c = &(*c)->elements[w[i]];
2295 type = type->array_element;
2296 break;
2297
2298 case vtn_base_type_struct:
2299 c = &(*c)->elements[w[i]];
2300 type = type->members[w[i]];
2301 break;
2302
2303 default:
2304 vtn_fail("%s must only index into composite types",
2305 spirv_op_to_string(opcode));
2306 }
2307 }
2308
2309 if (opcode == SpvOpCompositeExtract) {
2310 if (elem == -1) {
2311 val->constant = *c;
2312 } else {
2313 unsigned num_components = type->length;
2314 for (unsigned i = 0; i < num_components; i++)
2315 val->constant->values[i] = (*c)->values[elem + i];
2316 }
2317 } else {
2318 struct vtn_value *insert =
2319 vtn_value(b, w[4], vtn_value_type_constant);
2320 vtn_assert(insert->type == type);
2321 if (elem == -1) {
2322 *c = insert->constant;
2323 } else {
2324 unsigned num_components = type->length;
2325 for (unsigned i = 0; i < num_components; i++)
2326 (*c)->values[elem + i] = insert->constant->values[i];
2327 }
2328 }
2329 break;
2330 }
2331
2332 default: {
2333 bool swap;
2334 nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(val->type->type);
2335 nir_alu_type src_alu_type = dst_alu_type;
2336 unsigned num_components = glsl_get_vector_elements(val->type->type);
2337 unsigned bit_size;
2338
2339 vtn_assert(count <= 7);
2340
2341 switch (opcode) {
2342 case SpvOpSConvert:
2343 case SpvOpFConvert:
2344 case SpvOpUConvert:
2345 /* We have a source in a conversion */
2346 src_alu_type =
2347 nir_get_nir_type_for_glsl_type(vtn_get_value_type(b, w[4])->type);
2348 /* We use the bitsize of the conversion source to evaluate the opcode later */
2349 bit_size = glsl_get_bit_size(vtn_get_value_type(b, w[4])->type);
2350 break;
2351 default:
2352 bit_size = glsl_get_bit_size(val->type->type);
2353 };
2354
2355 bool exact;
2356 nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
2357 nir_alu_type_get_type_size(src_alu_type),
2358 nir_alu_type_get_type_size(dst_alu_type));
2359
2360 /* No SPIR-V opcodes handled through this path should set exact.
2361 * Since it is ignored, assert on it.
2362 */
2363 assert(!exact);
2364
2365 nir_const_value src[3][NIR_MAX_VEC_COMPONENTS];
2366
2367 for (unsigned i = 0; i < count - 4; i++) {
2368 struct vtn_value *src_val =
2369 vtn_value(b, w[4 + i], vtn_value_type_constant);
2370
2371 /* If this is an unsized source, pull the bit size from the
2372 * source; otherwise, we'll use the bit size from the destination.
2373 */
2374 if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]))
2375 bit_size = glsl_get_bit_size(src_val->type->type);
2376
2377 unsigned src_comps = nir_op_infos[op].input_sizes[i] ?
2378 nir_op_infos[op].input_sizes[i] :
2379 num_components;
2380
2381 unsigned j = swap ? 1 - i : i;
2382 for (unsigned c = 0; c < src_comps; c++)
2383 src[j][c] = src_val->constant->values[c];
2384 }
2385
2386 /* fix up fixed size sources */
2387 switch (op) {
2388 case nir_op_ishl:
2389 case nir_op_ishr:
2390 case nir_op_ushr: {
2391 if (bit_size == 32)
2392 break;
2393 for (unsigned i = 0; i < num_components; ++i) {
2394 switch (bit_size) {
2395 case 64: src[1][i].u32 = src[1][i].u64; break;
2396 case 16: src[1][i].u32 = src[1][i].u16; break;
2397 case 8: src[1][i].u32 = src[1][i].u8; break;
2398 }
2399 }
2400 break;
2401 }
2402 default:
2403 break;
2404 }
2405
2406 nir_const_value *srcs[3] = {
2407 src[0], src[1], src[2],
2408 };
2409 nir_eval_const_opcode(op, val->constant->values,
2410 num_components, bit_size, srcs,
2411 b->shader->info.float_controls_execution_mode);
2412 break;
2413 } /* default */
2414 }
2415 break;
2416 }
2417
2418 case SpvOpConstantNull:
2419 val->constant = vtn_null_constant(b, val->type);
2420 val->is_null_constant = true;
2421 break;
2422
2423 default:
2424 vtn_fail_with_opcode("Unhandled opcode", opcode);
2425 }
2426
2427 /* Now that we have the value, update the workgroup size if needed */
2428 if (gl_shader_stage_uses_workgroup(b->entry_point_stage))
2429 vtn_foreach_decoration(b, val, handle_workgroup_size_decoration_cb,
2430 NULL);
2431 }
2432
2433 static void
vtn_split_barrier_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics,SpvMemorySemanticsMask * before,SpvMemorySemanticsMask * after)2434 vtn_split_barrier_semantics(struct vtn_builder *b,
2435 SpvMemorySemanticsMask semantics,
2436 SpvMemorySemanticsMask *before,
2437 SpvMemorySemanticsMask *after)
2438 {
2439 /* For memory semantics embedded in operations, we split them into up to
2440 * two barriers, to be added before and after the operation. This is less
2441 * strict than if we propagated until the final backend stage, but still
2442 * result in correct execution.
2443 *
2444 * A further improvement could be pipe this information (and use!) into the
2445 * next compiler layers, at the expense of making the handling of barriers
2446 * more complicated.
2447 */
2448
2449 *before = SpvMemorySemanticsMaskNone;
2450 *after = SpvMemorySemanticsMaskNone;
2451
2452 SpvMemorySemanticsMask order_semantics =
2453 semantics & (SpvMemorySemanticsAcquireMask |
2454 SpvMemorySemanticsReleaseMask |
2455 SpvMemorySemanticsAcquireReleaseMask |
2456 SpvMemorySemanticsSequentiallyConsistentMask);
2457
2458 if (util_bitcount(order_semantics) > 1) {
2459 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2460 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2461 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2462 */
2463 vtn_warn("Multiple memory ordering semantics specified, "
2464 "assuming AcquireRelease.");
2465 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2466 }
2467
2468 const SpvMemorySemanticsMask av_vis_semantics =
2469 semantics & (SpvMemorySemanticsMakeAvailableMask |
2470 SpvMemorySemanticsMakeVisibleMask);
2471
2472 const SpvMemorySemanticsMask storage_semantics =
2473 semantics & (SpvMemorySemanticsUniformMemoryMask |
2474 SpvMemorySemanticsSubgroupMemoryMask |
2475 SpvMemorySemanticsWorkgroupMemoryMask |
2476 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2477 SpvMemorySemanticsAtomicCounterMemoryMask |
2478 SpvMemorySemanticsImageMemoryMask |
2479 SpvMemorySemanticsOutputMemoryMask);
2480
2481 const SpvMemorySemanticsMask other_semantics =
2482 semantics & ~(order_semantics | av_vis_semantics | storage_semantics |
2483 SpvMemorySemanticsVolatileMask);
2484
2485 if (other_semantics)
2486 vtn_warn("Ignoring unhandled memory semantics: %u\n", other_semantics);
2487
2488 /* SequentiallyConsistent is treated as AcquireRelease. */
2489
2490 /* The RELEASE barrier happens BEFORE the operation, and it is usually
2491 * associated with a Store. All the write operations with a matching
2492 * semantics will not be reordered after the Store.
2493 */
2494 if (order_semantics & (SpvMemorySemanticsReleaseMask |
2495 SpvMemorySemanticsAcquireReleaseMask |
2496 SpvMemorySemanticsSequentiallyConsistentMask)) {
2497 *before |= SpvMemorySemanticsReleaseMask | storage_semantics;
2498 }
2499
2500 /* The ACQUIRE barrier happens AFTER the operation, and it is usually
2501 * associated with a Load. All the operations with a matching semantics
2502 * will not be reordered before the Load.
2503 */
2504 if (order_semantics & (SpvMemorySemanticsAcquireMask |
2505 SpvMemorySemanticsAcquireReleaseMask |
2506 SpvMemorySemanticsSequentiallyConsistentMask)) {
2507 *after |= SpvMemorySemanticsAcquireMask | storage_semantics;
2508 }
2509
2510 if (av_vis_semantics & SpvMemorySemanticsMakeVisibleMask)
2511 *before |= SpvMemorySemanticsMakeVisibleMask | storage_semantics;
2512
2513 if (av_vis_semantics & SpvMemorySemanticsMakeAvailableMask)
2514 *after |= SpvMemorySemanticsMakeAvailableMask | storage_semantics;
2515 }
2516
2517 static nir_memory_semantics
vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2518 vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder *b,
2519 SpvMemorySemanticsMask semantics)
2520 {
2521 nir_memory_semantics nir_semantics = 0;
2522
2523 SpvMemorySemanticsMask order_semantics =
2524 semantics & (SpvMemorySemanticsAcquireMask |
2525 SpvMemorySemanticsReleaseMask |
2526 SpvMemorySemanticsAcquireReleaseMask |
2527 SpvMemorySemanticsSequentiallyConsistentMask);
2528
2529 if (util_bitcount(order_semantics) > 1) {
2530 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2531 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2532 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2533 */
2534 vtn_warn("Multiple memory ordering semantics bits specified, "
2535 "assuming AcquireRelease.");
2536 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2537 }
2538
2539 switch (order_semantics) {
2540 case 0:
2541 /* Not an ordering barrier. */
2542 break;
2543
2544 case SpvMemorySemanticsAcquireMask:
2545 nir_semantics = NIR_MEMORY_ACQUIRE;
2546 break;
2547
2548 case SpvMemorySemanticsReleaseMask:
2549 nir_semantics = NIR_MEMORY_RELEASE;
2550 break;
2551
2552 case SpvMemorySemanticsSequentiallyConsistentMask:
2553 FALLTHROUGH; /* Treated as AcquireRelease in Vulkan. */
2554 case SpvMemorySemanticsAcquireReleaseMask:
2555 nir_semantics = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE;
2556 break;
2557
2558 default:
2559 unreachable("Invalid memory order semantics");
2560 }
2561
2562 if (semantics & SpvMemorySemanticsMakeAvailableMask) {
2563 vtn_fail_if(!b->options->caps.vk_memory_model,
2564 "To use MakeAvailable memory semantics the VulkanMemoryModel "
2565 "capability must be declared.");
2566 nir_semantics |= NIR_MEMORY_MAKE_AVAILABLE;
2567 }
2568
2569 if (semantics & SpvMemorySemanticsMakeVisibleMask) {
2570 vtn_fail_if(!b->options->caps.vk_memory_model,
2571 "To use MakeVisible memory semantics the VulkanMemoryModel "
2572 "capability must be declared.");
2573 nir_semantics |= NIR_MEMORY_MAKE_VISIBLE;
2574 }
2575
2576 return nir_semantics;
2577 }
2578
2579 static nir_variable_mode
vtn_mem_semantics_to_nir_var_modes(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2580 vtn_mem_semantics_to_nir_var_modes(struct vtn_builder *b,
2581 SpvMemorySemanticsMask semantics)
2582 {
2583 /* Vulkan Environment for SPIR-V says "SubgroupMemory, CrossWorkgroupMemory,
2584 * and AtomicCounterMemory are ignored".
2585 */
2586 if (b->options->environment == NIR_SPIRV_VULKAN) {
2587 semantics &= ~(SpvMemorySemanticsSubgroupMemoryMask |
2588 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2589 SpvMemorySemanticsAtomicCounterMemoryMask);
2590 }
2591
2592 nir_variable_mode modes = 0;
2593 if (semantics & SpvMemorySemanticsUniformMemoryMask)
2594 modes |= nir_var_mem_ssbo | nir_var_mem_global;
2595 if (semantics & SpvMemorySemanticsImageMemoryMask)
2596 modes |= nir_var_image;
2597 if (semantics & SpvMemorySemanticsWorkgroupMemoryMask)
2598 modes |= nir_var_mem_shared;
2599 if (semantics & SpvMemorySemanticsCrossWorkgroupMemoryMask)
2600 modes |= nir_var_mem_global;
2601 if (semantics & SpvMemorySemanticsOutputMemoryMask) {
2602 modes |= nir_var_shader_out;
2603
2604 if (b->shader->info.stage == MESA_SHADER_TASK)
2605 modes |= nir_var_mem_task_payload;
2606 }
2607
2608 if (semantics & SpvMemorySemanticsAtomicCounterMemoryMask) {
2609 /* There's no nir_var_atomic_counter, but since atomic counters are
2610 * lowered to SSBOs, we use nir_var_mem_ssbo instead.
2611 */
2612 modes |= nir_var_mem_ssbo;
2613 }
2614
2615 return modes;
2616 }
2617
2618 mesa_scope
vtn_translate_scope(struct vtn_builder * b,SpvScope scope)2619 vtn_translate_scope(struct vtn_builder *b, SpvScope scope)
2620 {
2621 switch (scope) {
2622 case SpvScopeDevice:
2623 vtn_fail_if(b->options->caps.vk_memory_model &&
2624 !b->options->caps.vk_memory_model_device_scope,
2625 "If the Vulkan memory model is declared and any instruction "
2626 "uses Device scope, the VulkanMemoryModelDeviceScope "
2627 "capability must be declared.");
2628 return SCOPE_DEVICE;
2629
2630 case SpvScopeQueueFamily:
2631 vtn_fail_if(!b->options->caps.vk_memory_model,
2632 "To use Queue Family scope, the VulkanMemoryModel capability "
2633 "must be declared.");
2634 return SCOPE_QUEUE_FAMILY;
2635
2636 case SpvScopeWorkgroup:
2637 return SCOPE_WORKGROUP;
2638
2639 case SpvScopeSubgroup:
2640 return SCOPE_SUBGROUP;
2641
2642 case SpvScopeInvocation:
2643 return SCOPE_INVOCATION;
2644
2645 case SpvScopeShaderCallKHR:
2646 return SCOPE_SHADER_CALL;
2647
2648 default:
2649 vtn_fail("Invalid memory scope");
2650 }
2651 }
2652
2653 static void
vtn_emit_scoped_control_barrier(struct vtn_builder * b,SpvScope exec_scope,SpvScope mem_scope,SpvMemorySemanticsMask semantics)2654 vtn_emit_scoped_control_barrier(struct vtn_builder *b, SpvScope exec_scope,
2655 SpvScope mem_scope,
2656 SpvMemorySemanticsMask semantics)
2657 {
2658 nir_memory_semantics nir_semantics =
2659 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2660 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2661 mesa_scope nir_exec_scope = vtn_translate_scope(b, exec_scope);
2662
2663 /* Memory semantics is optional for OpControlBarrier. */
2664 mesa_scope nir_mem_scope;
2665 if (nir_semantics == 0 || modes == 0)
2666 nir_mem_scope = SCOPE_NONE;
2667 else
2668 nir_mem_scope = vtn_translate_scope(b, mem_scope);
2669
2670 nir_barrier(&b->nb, .execution_scope=nir_exec_scope, .memory_scope=nir_mem_scope,
2671 .memory_semantics=nir_semantics, .memory_modes=modes);
2672 }
2673
2674 void
vtn_emit_memory_barrier(struct vtn_builder * b,SpvScope scope,SpvMemorySemanticsMask semantics)2675 vtn_emit_memory_barrier(struct vtn_builder *b, SpvScope scope,
2676 SpvMemorySemanticsMask semantics)
2677 {
2678 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2679 nir_memory_semantics nir_semantics =
2680 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2681
2682 /* No barrier to add. */
2683 if (nir_semantics == 0 || modes == 0)
2684 return;
2685
2686 nir_barrier(&b->nb, .memory_scope=vtn_translate_scope(b, scope),
2687 .memory_semantics=nir_semantics,
2688 .memory_modes=modes);
2689 }
2690
2691 struct vtn_ssa_value *
vtn_create_ssa_value(struct vtn_builder * b,const struct glsl_type * type)2692 vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
2693 {
2694 /* Always use bare types for SSA values for a couple of reasons:
2695 *
2696 * 1. Code which emits deref chains should never listen to the explicit
2697 * layout information on the SSA value if any exists. If we've
2698 * accidentally been relying on this, we want to find those bugs.
2699 *
2700 * 2. We want to be able to quickly check that an SSA value being assigned
2701 * to a SPIR-V value has the right type. Using bare types everywhere
2702 * ensures that we can pointer-compare.
2703 */
2704 struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
2705 val->type = glsl_get_bare_type(type);
2706
2707
2708 if (!glsl_type_is_vector_or_scalar(type)) {
2709 unsigned elems = glsl_get_length(val->type);
2710 val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
2711 if (glsl_type_is_array_or_matrix(type) || glsl_type_is_cmat(type)) {
2712 const struct glsl_type *elem_type = glsl_get_array_element(type);
2713 for (unsigned i = 0; i < elems; i++)
2714 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2715 } else {
2716 vtn_assert(glsl_type_is_struct_or_ifc(type));
2717 for (unsigned i = 0; i < elems; i++) {
2718 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
2719 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2720 }
2721 }
2722 }
2723
2724 return val;
2725 }
2726
2727 void
vtn_set_ssa_value_var(struct vtn_builder * b,struct vtn_ssa_value * ssa,nir_variable * var)2728 vtn_set_ssa_value_var(struct vtn_builder *b, struct vtn_ssa_value *ssa, nir_variable *var)
2729 {
2730 vtn_assert(glsl_type_is_cmat(var->type));
2731 vtn_assert(var->type == ssa->type);
2732 ssa->is_variable = true;
2733 ssa->var = var;
2734 }
2735
2736 static nir_tex_src
vtn_tex_src(struct vtn_builder * b,unsigned index,nir_tex_src_type type)2737 vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
2738 {
2739 return nir_tex_src_for_ssa(type, vtn_get_nir_ssa(b, index));
2740 }
2741
2742 static uint32_t
image_operand_arg(struct vtn_builder * b,const uint32_t * w,uint32_t count,uint32_t mask_idx,SpvImageOperandsMask op)2743 image_operand_arg(struct vtn_builder *b, const uint32_t *w, uint32_t count,
2744 uint32_t mask_idx, SpvImageOperandsMask op)
2745 {
2746 static const SpvImageOperandsMask ops_with_arg =
2747 SpvImageOperandsBiasMask |
2748 SpvImageOperandsLodMask |
2749 SpvImageOperandsGradMask |
2750 SpvImageOperandsConstOffsetMask |
2751 SpvImageOperandsOffsetMask |
2752 SpvImageOperandsConstOffsetsMask |
2753 SpvImageOperandsSampleMask |
2754 SpvImageOperandsMinLodMask |
2755 SpvImageOperandsMakeTexelAvailableMask |
2756 SpvImageOperandsMakeTexelVisibleMask;
2757
2758 assert(util_bitcount(op) == 1);
2759 assert(w[mask_idx] & op);
2760 assert(op & ops_with_arg);
2761
2762 uint32_t idx = util_bitcount(w[mask_idx] & (op - 1) & ops_with_arg) + 1;
2763
2764 /* Adjust indices for operands with two arguments. */
2765 static const SpvImageOperandsMask ops_with_two_args =
2766 SpvImageOperandsGradMask;
2767 idx += util_bitcount(w[mask_idx] & (op - 1) & ops_with_two_args);
2768
2769 idx += mask_idx;
2770
2771 vtn_fail_if(idx + (op & ops_with_two_args ? 1 : 0) >= count,
2772 "Image op claims to have %s but does not enough "
2773 "following operands", spirv_imageoperands_to_string(op));
2774
2775 return idx;
2776 }
2777
2778 static void
non_uniform_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)2779 non_uniform_decoration_cb(struct vtn_builder *b,
2780 struct vtn_value *val, int member,
2781 const struct vtn_decoration *dec, void *void_ctx)
2782 {
2783 enum gl_access_qualifier *access = void_ctx;
2784 switch (dec->decoration) {
2785 case SpvDecorationNonUniformEXT:
2786 *access |= ACCESS_NON_UNIFORM;
2787 break;
2788
2789 default:
2790 break;
2791 }
2792 }
2793
2794 /* Apply SignExtend/ZeroExtend operands to get the actual result type for
2795 * image read/sample operations and source type for write operations.
2796 */
2797 static nir_alu_type
get_image_type(struct vtn_builder * b,nir_alu_type type,unsigned operands)2798 get_image_type(struct vtn_builder *b, nir_alu_type type, unsigned operands)
2799 {
2800 unsigned extend_operands =
2801 operands & (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask);
2802 vtn_fail_if(nir_alu_type_get_base_type(type) == nir_type_float && extend_operands,
2803 "SignExtend/ZeroExtend used on floating-point texel type");
2804 vtn_fail_if(extend_operands ==
2805 (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask),
2806 "SignExtend and ZeroExtend both specified");
2807
2808 if (operands & SpvImageOperandsSignExtendMask)
2809 return nir_type_int | nir_alu_type_get_type_size(type);
2810 if (operands & SpvImageOperandsZeroExtendMask)
2811 return nir_type_uint | nir_alu_type_get_type_size(type);
2812
2813 return type;
2814 }
2815
2816 static void
vtn_handle_texture(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)2817 vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
2818 const uint32_t *w, unsigned count)
2819 {
2820 if (opcode == SpvOpSampledImage) {
2821 struct vtn_sampled_image si = {
2822 .image = vtn_get_image(b, w[3], NULL),
2823 .sampler = vtn_get_sampler(b, w[4]),
2824 };
2825
2826 validate_image_type_for_sampled_image(
2827 b, si.image->type,
2828 "Type of Image operand of OpSampledImage");
2829
2830 enum gl_access_qualifier access = 0;
2831 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
2832 non_uniform_decoration_cb, &access);
2833 vtn_foreach_decoration(b, vtn_untyped_value(b, w[4]),
2834 non_uniform_decoration_cb, &access);
2835
2836 vtn_push_sampled_image(b, w[2], si, access & ACCESS_NON_UNIFORM);
2837 return;
2838 } else if (opcode == SpvOpImage) {
2839 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
2840
2841 enum gl_access_qualifier access = 0;
2842 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
2843 non_uniform_decoration_cb, &access);
2844
2845 vtn_push_image(b, w[2], si.image, access & ACCESS_NON_UNIFORM);
2846 return;
2847 } else if (opcode == SpvOpImageSparseTexelsResident) {
2848 nir_def *code = vtn_get_nir_ssa(b, w[3]);
2849 vtn_push_nir_ssa(b, w[2], nir_is_sparse_texels_resident(&b->nb, 1, code));
2850 return;
2851 }
2852
2853 nir_deref_instr *image = NULL, *sampler = NULL;
2854 struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
2855 if (sampled_val->type->base_type == vtn_base_type_sampled_image) {
2856 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
2857 image = si.image;
2858 sampler = si.sampler;
2859 } else {
2860 image = vtn_get_image(b, w[3], NULL);
2861 }
2862
2863 const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image->type);
2864 const bool is_array = glsl_sampler_type_is_array(image->type);
2865 nir_alu_type dest_type = nir_type_invalid;
2866
2867 /* Figure out the base texture operation */
2868 nir_texop texop;
2869 switch (opcode) {
2870 case SpvOpImageSampleImplicitLod:
2871 case SpvOpImageSparseSampleImplicitLod:
2872 case SpvOpImageSampleDrefImplicitLod:
2873 case SpvOpImageSparseSampleDrefImplicitLod:
2874 vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
2875 sampler_dim != GLSL_SAMPLER_DIM_MS &&
2876 sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
2877 texop = nir_texop_tex;
2878 break;
2879
2880 case SpvOpImageSampleProjImplicitLod:
2881 case SpvOpImageSampleProjDrefImplicitLod:
2882 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2883 sampler_dim == GLSL_SAMPLER_DIM_2D ||
2884 sampler_dim == GLSL_SAMPLER_DIM_3D ||
2885 sampler_dim == GLSL_SAMPLER_DIM_RECT);
2886 vtn_assert(!is_array);
2887 texop = nir_texop_tex;
2888 break;
2889
2890 case SpvOpImageSampleExplicitLod:
2891 case SpvOpImageSparseSampleExplicitLod:
2892 case SpvOpImageSampleDrefExplicitLod:
2893 case SpvOpImageSparseSampleDrefExplicitLod:
2894 vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
2895 sampler_dim != GLSL_SAMPLER_DIM_MS &&
2896 sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
2897 texop = nir_texop_txl;
2898 break;
2899
2900 case SpvOpImageSampleProjExplicitLod:
2901 case SpvOpImageSampleProjDrefExplicitLod:
2902 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2903 sampler_dim == GLSL_SAMPLER_DIM_2D ||
2904 sampler_dim == GLSL_SAMPLER_DIM_3D ||
2905 sampler_dim == GLSL_SAMPLER_DIM_RECT);
2906 vtn_assert(!is_array);
2907 texop = nir_texop_txl;
2908 break;
2909
2910 case SpvOpImageFetch:
2911 case SpvOpImageSparseFetch:
2912 vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_CUBE);
2913 if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
2914 texop = nir_texop_txf_ms;
2915 } else {
2916 texop = nir_texop_txf;
2917 }
2918 break;
2919
2920 case SpvOpImageGather:
2921 case SpvOpImageSparseGather:
2922 case SpvOpImageDrefGather:
2923 case SpvOpImageSparseDrefGather:
2924 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_2D ||
2925 sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
2926 sampler_dim == GLSL_SAMPLER_DIM_RECT);
2927 texop = nir_texop_tg4;
2928 break;
2929
2930 case SpvOpImageQuerySizeLod:
2931 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2932 sampler_dim == GLSL_SAMPLER_DIM_2D ||
2933 sampler_dim == GLSL_SAMPLER_DIM_3D ||
2934 sampler_dim == GLSL_SAMPLER_DIM_CUBE);
2935 texop = nir_texop_txs;
2936 dest_type = nir_type_int32;
2937 break;
2938
2939 case SpvOpImageQuerySize:
2940 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2941 sampler_dim == GLSL_SAMPLER_DIM_2D ||
2942 sampler_dim == GLSL_SAMPLER_DIM_3D ||
2943 sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
2944 sampler_dim == GLSL_SAMPLER_DIM_RECT ||
2945 sampler_dim == GLSL_SAMPLER_DIM_MS ||
2946 sampler_dim == GLSL_SAMPLER_DIM_BUF);
2947 texop = nir_texop_txs;
2948 dest_type = nir_type_int32;
2949 break;
2950
2951 case SpvOpImageQueryLod:
2952 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2953 sampler_dim == GLSL_SAMPLER_DIM_2D ||
2954 sampler_dim == GLSL_SAMPLER_DIM_3D ||
2955 sampler_dim == GLSL_SAMPLER_DIM_CUBE);
2956 texop = nir_texop_lod;
2957 dest_type = nir_type_float32;
2958 break;
2959
2960 case SpvOpImageQueryLevels:
2961 /* This operation is not valid for a MS image but present in some old
2962 * shaders. Just return 1 in those cases.
2963 */
2964 if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
2965 vtn_warn("OpImageQueryLevels 'Sampled Image' should have an MS of 0, "
2966 "but found MS of 1. Replacing query with constant value 1.");
2967 vtn_push_nir_ssa(b, w[2], nir_imm_int(&b->nb, 1));
2968 return;
2969 }
2970 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2971 sampler_dim == GLSL_SAMPLER_DIM_2D ||
2972 sampler_dim == GLSL_SAMPLER_DIM_3D ||
2973 sampler_dim == GLSL_SAMPLER_DIM_CUBE);
2974 texop = nir_texop_query_levels;
2975 dest_type = nir_type_int32;
2976 break;
2977
2978 case SpvOpImageQuerySamples:
2979 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS);
2980 texop = nir_texop_texture_samples;
2981 dest_type = nir_type_int32;
2982 break;
2983
2984 case SpvOpFragmentFetchAMD:
2985 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
2986 sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2987 texop = nir_texop_fragment_fetch_amd;
2988 break;
2989
2990 case SpvOpFragmentMaskFetchAMD:
2991 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
2992 sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2993 texop = nir_texop_fragment_mask_fetch_amd;
2994 dest_type = nir_type_uint32;
2995 break;
2996
2997 default:
2998 vtn_fail_with_opcode("Unhandled opcode", opcode);
2999 }
3000
3001 nir_tex_src srcs[10]; /* 10 should be enough */
3002 nir_tex_src *p = srcs;
3003
3004 p->src = nir_src_for_ssa(&image->def);
3005 p->src_type = nir_tex_src_texture_deref;
3006 p++;
3007
3008 switch (texop) {
3009 case nir_texop_tex:
3010 case nir_texop_txb:
3011 case nir_texop_txl:
3012 case nir_texop_txd:
3013 case nir_texop_tg4:
3014 case nir_texop_lod:
3015 vtn_fail_if(sampler == NULL,
3016 "%s requires an image of type OpTypeSampledImage",
3017 spirv_op_to_string(opcode));
3018 p->src = nir_src_for_ssa(&sampler->def);
3019 p->src_type = nir_tex_src_sampler_deref;
3020 p++;
3021 break;
3022 case nir_texop_txf:
3023 case nir_texop_txf_ms:
3024 case nir_texop_txs:
3025 case nir_texop_query_levels:
3026 case nir_texop_texture_samples:
3027 case nir_texop_samples_identical:
3028 case nir_texop_fragment_fetch_amd:
3029 case nir_texop_fragment_mask_fetch_amd:
3030 /* These don't */
3031 break;
3032 case nir_texop_txf_ms_fb:
3033 vtn_fail("unexpected nir_texop_txf_ms_fb");
3034 break;
3035 case nir_texop_txf_ms_mcs_intel:
3036 vtn_fail("unexpected nir_texop_txf_ms_mcs");
3037 break;
3038 case nir_texop_tex_prefetch:
3039 vtn_fail("unexpected nir_texop_tex_prefetch");
3040 break;
3041 case nir_texop_descriptor_amd:
3042 case nir_texop_sampler_descriptor_amd:
3043 vtn_fail("unexpected nir_texop_*descriptor_amd");
3044 break;
3045 case nir_texop_lod_bias_agx:
3046 vtn_fail("unexpected nir_texop_lod_bias_agx");
3047 break;
3048 case nir_texop_hdr_dim_nv:
3049 case nir_texop_tex_type_nv:
3050 vtn_fail("unexpected nir_texop_*_nv");
3051 break;
3052 }
3053
3054 unsigned idx = 4;
3055
3056 struct nir_def *coord;
3057 unsigned coord_components;
3058 switch (opcode) {
3059 case SpvOpImageSampleImplicitLod:
3060 case SpvOpImageSparseSampleImplicitLod:
3061 case SpvOpImageSampleExplicitLod:
3062 case SpvOpImageSparseSampleExplicitLod:
3063 case SpvOpImageSampleDrefImplicitLod:
3064 case SpvOpImageSparseSampleDrefImplicitLod:
3065 case SpvOpImageSampleDrefExplicitLod:
3066 case SpvOpImageSparseSampleDrefExplicitLod:
3067 case SpvOpImageSampleProjImplicitLod:
3068 case SpvOpImageSampleProjExplicitLod:
3069 case SpvOpImageSampleProjDrefImplicitLod:
3070 case SpvOpImageSampleProjDrefExplicitLod:
3071 case SpvOpImageFetch:
3072 case SpvOpImageSparseFetch:
3073 case SpvOpImageGather:
3074 case SpvOpImageSparseGather:
3075 case SpvOpImageDrefGather:
3076 case SpvOpImageSparseDrefGather:
3077 case SpvOpImageQueryLod:
3078 case SpvOpFragmentFetchAMD:
3079 case SpvOpFragmentMaskFetchAMD: {
3080 /* All these types have the coordinate as their first real argument */
3081 coord_components = glsl_get_sampler_dim_coordinate_components(sampler_dim);
3082
3083 if (is_array && texop != nir_texop_lod)
3084 coord_components++;
3085
3086 struct vtn_ssa_value *coord_val = vtn_ssa_value(b, w[idx++]);
3087 coord = coord_val->def;
3088 /* From the SPIR-V spec verxion 1.5, rev. 5:
3089 *
3090 * "Coordinate must be a scalar or vector of floating-point type. It
3091 * contains (u[, v] ... [, array layer]) as needed by the definition
3092 * of Sampled Image. It may be a vector larger than needed, but all
3093 * unused components appear after all used components."
3094 */
3095 vtn_fail_if(coord->num_components < coord_components,
3096 "Coordinate value passed has fewer components than sampler dimensionality.");
3097 p->src = nir_src_for_ssa(nir_trim_vector(&b->nb, coord, coord_components));
3098
3099 /* OpenCL allows integer sampling coordinates */
3100 if (glsl_type_is_integer(coord_val->type) &&
3101 opcode == SpvOpImageSampleExplicitLod) {
3102 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
3103 "Unless the Kernel capability is being used, the coordinate parameter "
3104 "OpImageSampleExplicitLod must be floating point.");
3105
3106 nir_def *coords[4];
3107 nir_def *f0_5 = nir_imm_float(&b->nb, 0.5);
3108 for (unsigned i = 0; i < coord_components; i++) {
3109 coords[i] = nir_i2f32(&b->nb, nir_channel(&b->nb, p->src.ssa, i));
3110
3111 if (!is_array || i != coord_components - 1)
3112 coords[i] = nir_fadd(&b->nb, coords[i], f0_5);
3113 }
3114
3115 p->src = nir_src_for_ssa(nir_vec(&b->nb, coords, coord_components));
3116 }
3117
3118 p->src_type = nir_tex_src_coord;
3119 p++;
3120 break;
3121 }
3122
3123 default:
3124 coord = NULL;
3125 coord_components = 0;
3126 break;
3127 }
3128
3129 switch (opcode) {
3130 case SpvOpImageSampleProjImplicitLod:
3131 case SpvOpImageSampleProjExplicitLod:
3132 case SpvOpImageSampleProjDrefImplicitLod:
3133 case SpvOpImageSampleProjDrefExplicitLod:
3134 /* These have the projector as the last coordinate component */
3135 p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components));
3136 p->src_type = nir_tex_src_projector;
3137 p++;
3138 break;
3139
3140 default:
3141 break;
3142 }
3143
3144 bool is_shadow = false;
3145 unsigned gather_component = 0;
3146 switch (opcode) {
3147 case SpvOpImageSampleDrefImplicitLod:
3148 case SpvOpImageSparseSampleDrefImplicitLod:
3149 case SpvOpImageSampleDrefExplicitLod:
3150 case SpvOpImageSparseSampleDrefExplicitLod:
3151 case SpvOpImageSampleProjDrefImplicitLod:
3152 case SpvOpImageSampleProjDrefExplicitLod:
3153 case SpvOpImageDrefGather:
3154 case SpvOpImageSparseDrefGather:
3155 /* These all have an explicit depth value as their next source */
3156 is_shadow = true;
3157 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
3158 break;
3159
3160 case SpvOpImageGather:
3161 case SpvOpImageSparseGather:
3162 /* This has a component as its next source */
3163 gather_component = vtn_constant_uint(b, w[idx++]);
3164 break;
3165
3166 default:
3167 break;
3168 }
3169
3170 bool is_sparse = false;
3171 switch (opcode) {
3172 case SpvOpImageSparseSampleImplicitLod:
3173 case SpvOpImageSparseSampleExplicitLod:
3174 case SpvOpImageSparseSampleDrefImplicitLod:
3175 case SpvOpImageSparseSampleDrefExplicitLod:
3176 case SpvOpImageSparseFetch:
3177 case SpvOpImageSparseGather:
3178 case SpvOpImageSparseDrefGather:
3179 is_sparse = true;
3180 break;
3181 default:
3182 break;
3183 }
3184
3185 /* For OpImageQuerySizeLod, we always have an LOD */
3186 if (opcode == SpvOpImageQuerySizeLod)
3187 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
3188
3189 /* For OpFragmentFetchAMD, we always have a multisample index */
3190 if (opcode == SpvOpFragmentFetchAMD)
3191 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
3192
3193 /* Now we need to handle some number of optional arguments */
3194 struct vtn_value *gather_offsets = NULL;
3195 uint32_t operands = SpvImageOperandsMaskNone;
3196 if (idx < count) {
3197 operands = w[idx];
3198
3199 if (operands & SpvImageOperandsBiasMask) {
3200 vtn_assert(texop == nir_texop_tex ||
3201 texop == nir_texop_tg4);
3202 if (texop == nir_texop_tex)
3203 texop = nir_texop_txb;
3204 uint32_t arg = image_operand_arg(b, w, count, idx,
3205 SpvImageOperandsBiasMask);
3206 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_bias);
3207 }
3208
3209 if (operands & SpvImageOperandsLodMask) {
3210 vtn_assert(texop == nir_texop_txl || texop == nir_texop_txf ||
3211 texop == nir_texop_txs || texop == nir_texop_tg4);
3212 uint32_t arg = image_operand_arg(b, w, count, idx,
3213 SpvImageOperandsLodMask);
3214 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_lod);
3215 }
3216
3217 if (operands & SpvImageOperandsGradMask) {
3218 vtn_assert(texop == nir_texop_txl);
3219 texop = nir_texop_txd;
3220 uint32_t arg = image_operand_arg(b, w, count, idx,
3221 SpvImageOperandsGradMask);
3222 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ddx);
3223 (*p++) = vtn_tex_src(b, w[arg + 1], nir_tex_src_ddy);
3224 }
3225
3226 vtn_fail_if(util_bitcount(operands & (SpvImageOperandsConstOffsetsMask |
3227 SpvImageOperandsOffsetMask |
3228 SpvImageOperandsConstOffsetMask)) > 1,
3229 "At most one of the ConstOffset, Offset, and ConstOffsets "
3230 "image operands can be used on a given instruction.");
3231
3232 if (operands & SpvImageOperandsOffsetMask) {
3233 uint32_t arg = image_operand_arg(b, w, count, idx,
3234 SpvImageOperandsOffsetMask);
3235 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3236 }
3237
3238 if (operands & SpvImageOperandsConstOffsetMask) {
3239 uint32_t arg = image_operand_arg(b, w, count, idx,
3240 SpvImageOperandsConstOffsetMask);
3241 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3242 }
3243
3244 if (operands & SpvImageOperandsConstOffsetsMask) {
3245 vtn_assert(texop == nir_texop_tg4);
3246 uint32_t arg = image_operand_arg(b, w, count, idx,
3247 SpvImageOperandsConstOffsetsMask);
3248 gather_offsets = vtn_value(b, w[arg], vtn_value_type_constant);
3249 }
3250
3251 if (operands & SpvImageOperandsSampleMask) {
3252 vtn_assert(texop == nir_texop_txf_ms);
3253 uint32_t arg = image_operand_arg(b, w, count, idx,
3254 SpvImageOperandsSampleMask);
3255 texop = nir_texop_txf_ms;
3256 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ms_index);
3257 }
3258
3259 if (operands & SpvImageOperandsMinLodMask) {
3260 vtn_assert(texop == nir_texop_tex ||
3261 texop == nir_texop_txb ||
3262 texop == nir_texop_txd);
3263 uint32_t arg = image_operand_arg(b, w, count, idx,
3264 SpvImageOperandsMinLodMask);
3265 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_min_lod);
3266 }
3267 }
3268
3269 struct vtn_type *ret_type = vtn_get_type(b, w[1]);
3270 struct vtn_type *struct_type = NULL;
3271 if (is_sparse) {
3272 vtn_assert(glsl_type_is_struct_or_ifc(ret_type->type));
3273 struct_type = ret_type;
3274 ret_type = struct_type->members[1];
3275 }
3276
3277 nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
3278 instr->op = texop;
3279
3280 memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
3281
3282 instr->coord_components = coord_components;
3283 instr->sampler_dim = sampler_dim;
3284 instr->is_array = is_array;
3285 instr->is_shadow = is_shadow;
3286 instr->is_sparse = is_sparse;
3287 instr->is_new_style_shadow =
3288 is_shadow && glsl_get_components(ret_type->type) == 1;
3289 instr->component = gather_component;
3290
3291 /* If SpvCapabilityImageGatherBiasLodAMD is enabled, texture gather without an explicit LOD
3292 * has an implicit one (instead of using level 0).
3293 */
3294 if (texop == nir_texop_tg4 && b->image_gather_bias_lod &&
3295 !(operands & SpvImageOperandsLodMask)) {
3296 instr->is_gather_implicit_lod = true;
3297 }
3298
3299 /* The Vulkan spec says:
3300 *
3301 * "If an instruction loads from or stores to a resource (including
3302 * atomics and image instructions) and the resource descriptor being
3303 * accessed is not dynamically uniform, then the operand corresponding
3304 * to that resource (e.g. the pointer or sampled image operand) must be
3305 * decorated with NonUniform."
3306 *
3307 * It's very careful to specify that the exact operand must be decorated
3308 * NonUniform. The SPIR-V parser is not expected to chase through long
3309 * chains to find the NonUniform decoration. It's either right there or we
3310 * can assume it doesn't exist.
3311 */
3312 enum gl_access_qualifier access = 0;
3313 vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
3314
3315 if (operands & SpvImageOperandsNontemporalMask)
3316 access |= ACCESS_NON_TEMPORAL;
3317
3318 if (sampler && b->options->force_tex_non_uniform)
3319 access |= ACCESS_NON_UNIFORM;
3320
3321 if (sampled_val->propagated_non_uniform)
3322 access |= ACCESS_NON_UNIFORM;
3323
3324 if (image && (access & ACCESS_NON_UNIFORM))
3325 instr->texture_non_uniform = true;
3326
3327 if (sampler && (access & ACCESS_NON_UNIFORM))
3328 instr->sampler_non_uniform = true;
3329
3330 /* for non-query ops, get dest_type from SPIR-V return type */
3331 if (dest_type == nir_type_invalid) {
3332 /* the return type should match the image type, unless the image type is
3333 * VOID (CL image), in which case the return type dictates the sampler
3334 */
3335 enum glsl_base_type sampler_base =
3336 glsl_get_sampler_result_type(image->type);
3337 enum glsl_base_type ret_base = glsl_get_base_type(ret_type->type);
3338 vtn_fail_if(sampler_base != ret_base && sampler_base != GLSL_TYPE_VOID,
3339 "SPIR-V return type mismatches image type. This is only valid "
3340 "for untyped images (OpenCL).");
3341 dest_type = nir_get_nir_type_for_glsl_base_type(ret_base);
3342 dest_type = get_image_type(b, dest_type, operands);
3343 }
3344
3345 instr->dest_type = dest_type;
3346
3347 nir_def_init(&instr->instr, &instr->def,
3348 nir_tex_instr_dest_size(instr), 32);
3349
3350 vtn_assert(glsl_get_vector_elements(ret_type->type) ==
3351 nir_tex_instr_result_size(instr));
3352
3353 if (gather_offsets) {
3354 vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array ||
3355 gather_offsets->type->length != 4,
3356 "ConstOffsets must be an array of size four of vectors "
3357 "of two integer components");
3358
3359 struct vtn_type *vec_type = gather_offsets->type->array_element;
3360 vtn_fail_if(vec_type->base_type != vtn_base_type_vector ||
3361 vec_type->length != 2 ||
3362 !glsl_type_is_integer(vec_type->type),
3363 "ConstOffsets must be an array of size four of vectors "
3364 "of two integer components");
3365
3366 unsigned bit_size = glsl_get_bit_size(vec_type->type);
3367 for (uint32_t i = 0; i < 4; i++) {
3368 const nir_const_value *cvec =
3369 gather_offsets->constant->elements[i]->values;
3370 for (uint32_t j = 0; j < 2; j++) {
3371 switch (bit_size) {
3372 case 8: instr->tg4_offsets[i][j] = cvec[j].i8; break;
3373 case 16: instr->tg4_offsets[i][j] = cvec[j].i16; break;
3374 case 32: instr->tg4_offsets[i][j] = cvec[j].i32; break;
3375 case 64: instr->tg4_offsets[i][j] = cvec[j].i64; break;
3376 default:
3377 vtn_fail("Unsupported bit size: %u", bit_size);
3378 }
3379 }
3380 }
3381 }
3382
3383 nir_builder_instr_insert(&b->nb, &instr->instr);
3384
3385 if (is_sparse) {
3386 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3387 unsigned result_size = glsl_get_vector_elements(ret_type->type);
3388 dest->elems[0]->def = nir_channel(&b->nb, &instr->def, result_size);
3389 dest->elems[1]->def = nir_trim_vector(&b->nb, &instr->def,
3390 result_size);
3391 vtn_push_ssa_value(b, w[2], dest);
3392 } else {
3393 vtn_push_nir_ssa(b, w[2], &instr->def);
3394 }
3395 }
3396
3397 static nir_atomic_op
translate_atomic_op(SpvOp opcode)3398 translate_atomic_op(SpvOp opcode)
3399 {
3400 switch (opcode) {
3401 case SpvOpAtomicExchange: return nir_atomic_op_xchg;
3402 case SpvOpAtomicCompareExchange: return nir_atomic_op_cmpxchg;
3403 case SpvOpAtomicCompareExchangeWeak: return nir_atomic_op_cmpxchg;
3404 case SpvOpAtomicIIncrement: return nir_atomic_op_iadd;
3405 case SpvOpAtomicIDecrement: return nir_atomic_op_iadd;
3406 case SpvOpAtomicIAdd: return nir_atomic_op_iadd;
3407 case SpvOpAtomicISub: return nir_atomic_op_iadd;
3408 case SpvOpAtomicSMin: return nir_atomic_op_imin;
3409 case SpvOpAtomicUMin: return nir_atomic_op_umin;
3410 case SpvOpAtomicSMax: return nir_atomic_op_imax;
3411 case SpvOpAtomicUMax: return nir_atomic_op_umax;
3412 case SpvOpAtomicAnd: return nir_atomic_op_iand;
3413 case SpvOpAtomicOr: return nir_atomic_op_ior;
3414 case SpvOpAtomicXor: return nir_atomic_op_ixor;
3415 case SpvOpAtomicFAddEXT: return nir_atomic_op_fadd;
3416 case SpvOpAtomicFMinEXT: return nir_atomic_op_fmin;
3417 case SpvOpAtomicFMaxEXT: return nir_atomic_op_fmax;
3418 case SpvOpAtomicFlagTestAndSet: return nir_atomic_op_cmpxchg;
3419 default:
3420 unreachable("Invalid atomic");
3421 }
3422 }
3423
3424 static void
fill_common_atomic_sources(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_src * src)3425 fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
3426 const uint32_t *w, nir_src *src)
3427 {
3428 const struct glsl_type *type = vtn_get_type(b, w[1])->type;
3429 unsigned bit_size = glsl_get_bit_size(type);
3430
3431 switch (opcode) {
3432 case SpvOpAtomicIIncrement:
3433 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 1, bit_size));
3434 break;
3435
3436 case SpvOpAtomicIDecrement:
3437 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, bit_size));
3438 break;
3439
3440 case SpvOpAtomicISub:
3441 src[0] =
3442 nir_src_for_ssa(nir_ineg(&b->nb, vtn_get_nir_ssa(b, w[6])));
3443 break;
3444
3445 case SpvOpAtomicCompareExchange:
3446 case SpvOpAtomicCompareExchangeWeak:
3447 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[8]));
3448 src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[7]));
3449 break;
3450
3451 case SpvOpAtomicExchange:
3452 case SpvOpAtomicIAdd:
3453 case SpvOpAtomicSMin:
3454 case SpvOpAtomicUMin:
3455 case SpvOpAtomicSMax:
3456 case SpvOpAtomicUMax:
3457 case SpvOpAtomicAnd:
3458 case SpvOpAtomicOr:
3459 case SpvOpAtomicXor:
3460 case SpvOpAtomicFAddEXT:
3461 case SpvOpAtomicFMinEXT:
3462 case SpvOpAtomicFMaxEXT:
3463 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6]));
3464 break;
3465
3466 default:
3467 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3468 }
3469 }
3470
3471 static nir_def *
get_image_coord(struct vtn_builder * b,uint32_t value)3472 get_image_coord(struct vtn_builder *b, uint32_t value)
3473 {
3474 nir_def *coord = vtn_get_nir_ssa(b, value);
3475 /* The image_load_store intrinsics assume a 4-dim coordinate */
3476 return nir_pad_vec4(&b->nb, coord);
3477 }
3478
3479 static void
vtn_handle_image(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3480 vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
3481 const uint32_t *w, unsigned count)
3482 {
3483 /* Just get this one out of the way */
3484 if (opcode == SpvOpImageTexelPointer) {
3485 struct vtn_value *val =
3486 vtn_push_value(b, w[2], vtn_value_type_image_pointer);
3487 val->image = vtn_alloc(b, struct vtn_image_pointer);
3488
3489 val->image->image = vtn_nir_deref(b, w[3]);
3490 val->image->coord = get_image_coord(b, w[4]);
3491 val->image->sample = vtn_get_nir_ssa(b, w[5]);
3492 val->image->lod = nir_imm_int(&b->nb, 0);
3493 return;
3494 }
3495
3496 struct vtn_image_pointer image;
3497 SpvScope scope = SpvScopeInvocation;
3498 SpvMemorySemanticsMask semantics = 0;
3499 SpvImageOperandsMask operands = SpvImageOperandsMaskNone;
3500
3501 enum gl_access_qualifier access = 0;
3502
3503 struct vtn_value *res_val;
3504 switch (opcode) {
3505 case SpvOpAtomicExchange:
3506 case SpvOpAtomicCompareExchange:
3507 case SpvOpAtomicCompareExchangeWeak:
3508 case SpvOpAtomicIIncrement:
3509 case SpvOpAtomicIDecrement:
3510 case SpvOpAtomicIAdd:
3511 case SpvOpAtomicISub:
3512 case SpvOpAtomicLoad:
3513 case SpvOpAtomicSMin:
3514 case SpvOpAtomicUMin:
3515 case SpvOpAtomicSMax:
3516 case SpvOpAtomicUMax:
3517 case SpvOpAtomicAnd:
3518 case SpvOpAtomicOr:
3519 case SpvOpAtomicXor:
3520 case SpvOpAtomicFAddEXT:
3521 case SpvOpAtomicFMinEXT:
3522 case SpvOpAtomicFMaxEXT:
3523 res_val = vtn_value(b, w[3], vtn_value_type_image_pointer);
3524 image = *res_val->image;
3525 scope = vtn_constant_uint(b, w[4]);
3526 semantics = vtn_constant_uint(b, w[5]);
3527 access |= ACCESS_COHERENT;
3528 break;
3529
3530 case SpvOpAtomicStore:
3531 res_val = vtn_value(b, w[1], vtn_value_type_image_pointer);
3532 image = *res_val->image;
3533 scope = vtn_constant_uint(b, w[2]);
3534 semantics = vtn_constant_uint(b, w[3]);
3535 access |= ACCESS_COHERENT;
3536 break;
3537
3538 case SpvOpImageQuerySizeLod:
3539 res_val = vtn_untyped_value(b, w[3]);
3540 image.image = vtn_get_image(b, w[3], &access);
3541 image.coord = NULL;
3542 image.sample = NULL;
3543 image.lod = vtn_ssa_value(b, w[4])->def;
3544 break;
3545
3546 case SpvOpImageQuerySize:
3547 case SpvOpImageQuerySamples:
3548 res_val = vtn_untyped_value(b, w[3]);
3549 image.image = vtn_get_image(b, w[3], &access);
3550 image.coord = NULL;
3551 image.sample = NULL;
3552 image.lod = NULL;
3553 break;
3554
3555 case SpvOpImageQueryFormat:
3556 case SpvOpImageQueryOrder:
3557 res_val = vtn_untyped_value(b, w[3]);
3558 image.image = vtn_get_image(b, w[3], &access);
3559 image.coord = NULL;
3560 image.sample = NULL;
3561 image.lod = NULL;
3562 break;
3563
3564 case SpvOpImageRead:
3565 case SpvOpImageSparseRead: {
3566 res_val = vtn_untyped_value(b, w[3]);
3567 image.image = vtn_get_image(b, w[3], &access);
3568 image.coord = get_image_coord(b, w[4]);
3569
3570 operands = count > 5 ? w[5] : SpvImageOperandsMaskNone;
3571
3572 if (operands & SpvImageOperandsSampleMask) {
3573 uint32_t arg = image_operand_arg(b, w, count, 5,
3574 SpvImageOperandsSampleMask);
3575 image.sample = vtn_get_nir_ssa(b, w[arg]);
3576 } else {
3577 image.sample = nir_undef(&b->nb, 1, 32);
3578 }
3579
3580 if (operands & SpvImageOperandsMakeTexelVisibleMask) {
3581 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3582 "MakeTexelVisible requires NonPrivateTexel to also be set.");
3583 uint32_t arg = image_operand_arg(b, w, count, 5,
3584 SpvImageOperandsMakeTexelVisibleMask);
3585 semantics = SpvMemorySemanticsMakeVisibleMask;
3586 scope = vtn_constant_uint(b, w[arg]);
3587 }
3588
3589 if (operands & SpvImageOperandsLodMask) {
3590 uint32_t arg = image_operand_arg(b, w, count, 5,
3591 SpvImageOperandsLodMask);
3592 image.lod = vtn_get_nir_ssa(b, w[arg]);
3593 } else {
3594 image.lod = nir_imm_int(&b->nb, 0);
3595 }
3596
3597 if (operands & SpvImageOperandsVolatileTexelMask)
3598 access |= ACCESS_VOLATILE;
3599 if (operands & SpvImageOperandsNontemporalMask)
3600 access |= ACCESS_NON_TEMPORAL;
3601
3602 break;
3603 }
3604
3605 case SpvOpImageWrite: {
3606 res_val = vtn_untyped_value(b, w[1]);
3607 image.image = vtn_get_image(b, w[1], &access);
3608 image.coord = get_image_coord(b, w[2]);
3609
3610 /* texel = w[3] */
3611
3612 operands = count > 4 ? w[4] : SpvImageOperandsMaskNone;
3613
3614 if (operands & SpvImageOperandsSampleMask) {
3615 uint32_t arg = image_operand_arg(b, w, count, 4,
3616 SpvImageOperandsSampleMask);
3617 image.sample = vtn_get_nir_ssa(b, w[arg]);
3618 } else {
3619 image.sample = nir_undef(&b->nb, 1, 32);
3620 }
3621
3622 if (operands & SpvImageOperandsMakeTexelAvailableMask) {
3623 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3624 "MakeTexelAvailable requires NonPrivateTexel to also be set.");
3625 uint32_t arg = image_operand_arg(b, w, count, 4,
3626 SpvImageOperandsMakeTexelAvailableMask);
3627 semantics = SpvMemorySemanticsMakeAvailableMask;
3628 scope = vtn_constant_uint(b, w[arg]);
3629 }
3630
3631 if (operands & SpvImageOperandsLodMask) {
3632 uint32_t arg = image_operand_arg(b, w, count, 4,
3633 SpvImageOperandsLodMask);
3634 image.lod = vtn_get_nir_ssa(b, w[arg]);
3635 } else {
3636 image.lod = nir_imm_int(&b->nb, 0);
3637 }
3638
3639 if (operands & SpvImageOperandsVolatileTexelMask)
3640 access |= ACCESS_VOLATILE;
3641 if (operands & SpvImageOperandsNontemporalMask)
3642 access |= ACCESS_NON_TEMPORAL;
3643
3644 break;
3645 }
3646
3647 default:
3648 vtn_fail_with_opcode("Invalid image opcode", opcode);
3649 }
3650
3651 if (semantics & SpvMemorySemanticsVolatileMask)
3652 access |= ACCESS_VOLATILE;
3653
3654 nir_intrinsic_op op;
3655 switch (opcode) {
3656 #define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_deref_##N; break;
3657 OP(ImageQuerySize, size)
3658 OP(ImageQuerySizeLod, size)
3659 OP(ImageRead, load)
3660 OP(ImageSparseRead, sparse_load)
3661 OP(ImageWrite, store)
3662 OP(AtomicLoad, load)
3663 OP(AtomicStore, store)
3664 OP(AtomicExchange, atomic)
3665 OP(AtomicCompareExchange, atomic_swap)
3666 OP(AtomicCompareExchangeWeak, atomic_swap)
3667 OP(AtomicIIncrement, atomic)
3668 OP(AtomicIDecrement, atomic)
3669 OP(AtomicIAdd, atomic)
3670 OP(AtomicISub, atomic)
3671 OP(AtomicSMin, atomic)
3672 OP(AtomicUMin, atomic)
3673 OP(AtomicSMax, atomic)
3674 OP(AtomicUMax, atomic)
3675 OP(AtomicAnd, atomic)
3676 OP(AtomicOr, atomic)
3677 OP(AtomicXor, atomic)
3678 OP(AtomicFAddEXT, atomic)
3679 OP(AtomicFMinEXT, atomic)
3680 OP(AtomicFMaxEXT, atomic)
3681 OP(ImageQueryFormat, format)
3682 OP(ImageQueryOrder, order)
3683 OP(ImageQuerySamples, samples)
3684 #undef OP
3685 default:
3686 vtn_fail_with_opcode("Invalid image opcode", opcode);
3687 }
3688
3689 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
3690 if (nir_intrinsic_has_atomic_op(intrin))
3691 nir_intrinsic_set_atomic_op(intrin, translate_atomic_op(opcode));
3692
3693 intrin->src[0] = nir_src_for_ssa(&image.image->def);
3694 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(image.image->type));
3695 nir_intrinsic_set_image_array(intrin,
3696 glsl_sampler_type_is_array(image.image->type));
3697
3698 switch (opcode) {
3699 case SpvOpImageQuerySamples:
3700 case SpvOpImageQuerySize:
3701 case SpvOpImageQuerySizeLod:
3702 case SpvOpImageQueryFormat:
3703 case SpvOpImageQueryOrder:
3704 break;
3705 default:
3706 /* The image coordinate is always 4 components but we may not have that
3707 * many. Swizzle to compensate.
3708 */
3709 intrin->src[1] = nir_src_for_ssa(nir_pad_vec4(&b->nb, image.coord));
3710 intrin->src[2] = nir_src_for_ssa(image.sample);
3711 break;
3712 }
3713
3714 /* The Vulkan spec says:
3715 *
3716 * "If an instruction loads from or stores to a resource (including
3717 * atomics and image instructions) and the resource descriptor being
3718 * accessed is not dynamically uniform, then the operand corresponding
3719 * to that resource (e.g. the pointer or sampled image operand) must be
3720 * decorated with NonUniform."
3721 *
3722 * It's very careful to specify that the exact operand must be decorated
3723 * NonUniform. The SPIR-V parser is not expected to chase through long
3724 * chains to find the NonUniform decoration. It's either right there or we
3725 * can assume it doesn't exist.
3726 */
3727 vtn_foreach_decoration(b, res_val, non_uniform_decoration_cb, &access);
3728 nir_intrinsic_set_access(intrin, access);
3729
3730 switch (opcode) {
3731 case SpvOpImageQuerySamples:
3732 case SpvOpImageQueryFormat:
3733 case SpvOpImageQueryOrder:
3734 /* No additional sources */
3735 break;
3736 case SpvOpImageQuerySize:
3737 intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0));
3738 break;
3739 case SpvOpImageQuerySizeLod:
3740 intrin->src[1] = nir_src_for_ssa(image.lod);
3741 break;
3742 case SpvOpAtomicLoad:
3743 case SpvOpImageRead:
3744 case SpvOpImageSparseRead:
3745 /* Only OpImageRead can support a lod parameter if
3746 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3747 * intrinsics definition for atomics requires us to set it for
3748 * OpAtomicLoad.
3749 */
3750 intrin->src[3] = nir_src_for_ssa(image.lod);
3751 break;
3752 case SpvOpAtomicStore:
3753 case SpvOpImageWrite: {
3754 const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3];
3755 struct vtn_ssa_value *value = vtn_ssa_value(b, value_id);
3756 /* nir_intrinsic_image_deref_store always takes a vec4 value */
3757 assert(op == nir_intrinsic_image_deref_store);
3758 intrin->num_components = 4;
3759 intrin->src[3] = nir_src_for_ssa(nir_pad_vec4(&b->nb, value->def));
3760 /* Only OpImageWrite can support a lod parameter if
3761 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3762 * intrinsics definition for atomics requires us to set it for
3763 * OpAtomicStore.
3764 */
3765 intrin->src[4] = nir_src_for_ssa(image.lod);
3766
3767 nir_alu_type src_type =
3768 get_image_type(b, nir_get_nir_type_for_glsl_type(value->type), operands);
3769 nir_intrinsic_set_src_type(intrin, src_type);
3770 break;
3771 }
3772
3773 case SpvOpAtomicCompareExchange:
3774 case SpvOpAtomicCompareExchangeWeak:
3775 case SpvOpAtomicIIncrement:
3776 case SpvOpAtomicIDecrement:
3777 case SpvOpAtomicExchange:
3778 case SpvOpAtomicIAdd:
3779 case SpvOpAtomicISub:
3780 case SpvOpAtomicSMin:
3781 case SpvOpAtomicUMin:
3782 case SpvOpAtomicSMax:
3783 case SpvOpAtomicUMax:
3784 case SpvOpAtomicAnd:
3785 case SpvOpAtomicOr:
3786 case SpvOpAtomicXor:
3787 case SpvOpAtomicFAddEXT:
3788 case SpvOpAtomicFMinEXT:
3789 case SpvOpAtomicFMaxEXT:
3790 fill_common_atomic_sources(b, opcode, w, &intrin->src[3]);
3791 break;
3792
3793 default:
3794 vtn_fail_with_opcode("Invalid image opcode", opcode);
3795 }
3796
3797 /* Image operations implicitly have the Image storage memory semantics. */
3798 semantics |= SpvMemorySemanticsImageMemoryMask;
3799
3800 SpvMemorySemanticsMask before_semantics;
3801 SpvMemorySemanticsMask after_semantics;
3802 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
3803
3804 if (before_semantics)
3805 vtn_emit_memory_barrier(b, scope, before_semantics);
3806
3807 if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) {
3808 struct vtn_type *type = vtn_get_type(b, w[1]);
3809 struct vtn_type *struct_type = NULL;
3810 if (opcode == SpvOpImageSparseRead) {
3811 vtn_assert(glsl_type_is_struct_or_ifc(type->type));
3812 struct_type = type;
3813 type = struct_type->members[1];
3814 }
3815
3816 unsigned dest_components = glsl_get_vector_elements(type->type);
3817 if (opcode == SpvOpImageSparseRead)
3818 dest_components++;
3819
3820 if (nir_intrinsic_infos[op].dest_components == 0)
3821 intrin->num_components = dest_components;
3822
3823 unsigned bit_size = glsl_get_bit_size(type->type);
3824 if (opcode == SpvOpImageQuerySize ||
3825 opcode == SpvOpImageQuerySizeLod)
3826 bit_size = MIN2(bit_size, 32);
3827
3828 nir_def_init(&intrin->instr, &intrin->def,
3829 nir_intrinsic_dest_components(intrin), bit_size);
3830
3831 nir_builder_instr_insert(&b->nb, &intrin->instr);
3832
3833 nir_def *result = nir_trim_vector(&b->nb, &intrin->def,
3834 dest_components);
3835
3836 if (opcode == SpvOpImageQuerySize ||
3837 opcode == SpvOpImageQuerySizeLod)
3838 result = nir_u2uN(&b->nb, result, glsl_get_bit_size(type->type));
3839
3840 if (opcode == SpvOpImageSparseRead) {
3841 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3842 unsigned res_type_size = glsl_get_vector_elements(type->type);
3843 dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size);
3844 if (intrin->def.bit_size != 32)
3845 dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def);
3846 dest->elems[1]->def = nir_trim_vector(&b->nb, result, res_type_size);
3847 vtn_push_ssa_value(b, w[2], dest);
3848 } else {
3849 vtn_push_nir_ssa(b, w[2], result);
3850 }
3851
3852 if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead ||
3853 opcode == SpvOpAtomicLoad) {
3854 nir_alu_type dest_type =
3855 get_image_type(b, nir_get_nir_type_for_glsl_type(type->type), operands);
3856 nir_intrinsic_set_dest_type(intrin, dest_type);
3857 }
3858 } else {
3859 nir_builder_instr_insert(&b->nb, &intrin->instr);
3860 }
3861
3862 if (after_semantics)
3863 vtn_emit_memory_barrier(b, scope, after_semantics);
3864 }
3865
3866 static nir_intrinsic_op
get_uniform_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)3867 get_uniform_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
3868 {
3869 switch (opcode) {
3870 #define OP(S, N) case SpvOp##S: return nir_intrinsic_atomic_counter_ ##N;
3871 OP(AtomicLoad, read_deref)
3872 OP(AtomicExchange, exchange)
3873 OP(AtomicCompareExchange, comp_swap)
3874 OP(AtomicCompareExchangeWeak, comp_swap)
3875 OP(AtomicIIncrement, inc_deref)
3876 OP(AtomicIDecrement, post_dec_deref)
3877 OP(AtomicIAdd, add_deref)
3878 OP(AtomicISub, add_deref)
3879 OP(AtomicUMin, min_deref)
3880 OP(AtomicUMax, max_deref)
3881 OP(AtomicAnd, and_deref)
3882 OP(AtomicOr, or_deref)
3883 OP(AtomicXor, xor_deref)
3884 #undef OP
3885 default:
3886 /* We left the following out: AtomicStore, AtomicSMin and
3887 * AtomicSmax. Right now there are not nir intrinsics for them. At this
3888 * moment Atomic Counter support is needed for ARB_spirv support, so is
3889 * only need to support GLSL Atomic Counters that are uints and don't
3890 * allow direct storage.
3891 */
3892 vtn_fail("Invalid uniform atomic");
3893 }
3894 }
3895
3896 static nir_intrinsic_op
get_deref_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)3897 get_deref_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
3898 {
3899 switch (opcode) {
3900 case SpvOpAtomicLoad: return nir_intrinsic_load_deref;
3901 case SpvOpAtomicFlagClear:
3902 case SpvOpAtomicStore: return nir_intrinsic_store_deref;
3903 #define OP(S, N) case SpvOp##S: return nir_intrinsic_deref_##N;
3904 OP(AtomicExchange, atomic)
3905 OP(AtomicCompareExchange, atomic_swap)
3906 OP(AtomicCompareExchangeWeak, atomic_swap)
3907 OP(AtomicIIncrement, atomic)
3908 OP(AtomicIDecrement, atomic)
3909 OP(AtomicIAdd, atomic)
3910 OP(AtomicISub, atomic)
3911 OP(AtomicSMin, atomic)
3912 OP(AtomicUMin, atomic)
3913 OP(AtomicSMax, atomic)
3914 OP(AtomicUMax, atomic)
3915 OP(AtomicAnd, atomic)
3916 OP(AtomicOr, atomic)
3917 OP(AtomicXor, atomic)
3918 OP(AtomicFAddEXT, atomic)
3919 OP(AtomicFMinEXT, atomic)
3920 OP(AtomicFMaxEXT, atomic)
3921 OP(AtomicFlagTestAndSet, atomic_swap)
3922 #undef OP
3923 default:
3924 vtn_fail_with_opcode("Invalid shared atomic", opcode);
3925 }
3926 }
3927
3928 /*
3929 * Handles shared atomics, ssbo atomics and atomic counters.
3930 */
3931 static void
vtn_handle_atomics(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)3932 vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
3933 const uint32_t *w, UNUSED unsigned count)
3934 {
3935 struct vtn_pointer *ptr;
3936 nir_intrinsic_instr *atomic;
3937
3938 SpvScope scope = SpvScopeInvocation;
3939 SpvMemorySemanticsMask semantics = 0;
3940 enum gl_access_qualifier access = 0;
3941
3942 switch (opcode) {
3943 case SpvOpAtomicLoad:
3944 case SpvOpAtomicExchange:
3945 case SpvOpAtomicCompareExchange:
3946 case SpvOpAtomicCompareExchangeWeak:
3947 case SpvOpAtomicIIncrement:
3948 case SpvOpAtomicIDecrement:
3949 case SpvOpAtomicIAdd:
3950 case SpvOpAtomicISub:
3951 case SpvOpAtomicSMin:
3952 case SpvOpAtomicUMin:
3953 case SpvOpAtomicSMax:
3954 case SpvOpAtomicUMax:
3955 case SpvOpAtomicAnd:
3956 case SpvOpAtomicOr:
3957 case SpvOpAtomicXor:
3958 case SpvOpAtomicFAddEXT:
3959 case SpvOpAtomicFMinEXT:
3960 case SpvOpAtomicFMaxEXT:
3961 case SpvOpAtomicFlagTestAndSet:
3962 ptr = vtn_pointer(b, w[3]);
3963 scope = vtn_constant_uint(b, w[4]);
3964 semantics = vtn_constant_uint(b, w[5]);
3965 break;
3966 case SpvOpAtomicFlagClear:
3967 case SpvOpAtomicStore:
3968 ptr = vtn_pointer(b, w[1]);
3969 scope = vtn_constant_uint(b, w[2]);
3970 semantics = vtn_constant_uint(b, w[3]);
3971 break;
3972
3973 default:
3974 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3975 }
3976
3977 if (semantics & SpvMemorySemanticsVolatileMask)
3978 access |= ACCESS_VOLATILE;
3979
3980 /* uniform as "atomic counter uniform" */
3981 if (ptr->mode == vtn_variable_mode_atomic_counter) {
3982 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
3983 nir_intrinsic_op op = get_uniform_nir_atomic_op(b, opcode);
3984 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
3985 atomic->src[0] = nir_src_for_ssa(&deref->def);
3986
3987 /* SSBO needs to initialize index/offset. In this case we don't need to,
3988 * as that info is already stored on the ptr->var->var nir_variable (see
3989 * vtn_create_variable)
3990 */
3991
3992 switch (opcode) {
3993 case SpvOpAtomicLoad:
3994 case SpvOpAtomicExchange:
3995 case SpvOpAtomicCompareExchange:
3996 case SpvOpAtomicCompareExchangeWeak:
3997 case SpvOpAtomicIIncrement:
3998 case SpvOpAtomicIDecrement:
3999 case SpvOpAtomicIAdd:
4000 case SpvOpAtomicISub:
4001 case SpvOpAtomicSMin:
4002 case SpvOpAtomicUMin:
4003 case SpvOpAtomicSMax:
4004 case SpvOpAtomicUMax:
4005 case SpvOpAtomicAnd:
4006 case SpvOpAtomicOr:
4007 case SpvOpAtomicXor:
4008 /* Nothing: we don't need to call fill_common_atomic_sources here, as
4009 * atomic counter uniforms doesn't have sources
4010 */
4011 break;
4012
4013 default:
4014 unreachable("Invalid SPIR-V atomic");
4015
4016 }
4017 } else {
4018 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
4019 const struct glsl_type *deref_type = deref->type;
4020 nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode);
4021 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
4022 atomic->src[0] = nir_src_for_ssa(&deref->def);
4023
4024 if (nir_intrinsic_has_atomic_op(atomic))
4025 nir_intrinsic_set_atomic_op(atomic, translate_atomic_op(opcode));
4026
4027 if (ptr->mode != vtn_variable_mode_workgroup)
4028 access |= ACCESS_COHERENT;
4029
4030 nir_intrinsic_set_access(atomic, access);
4031
4032 switch (opcode) {
4033 case SpvOpAtomicLoad:
4034 atomic->num_components = glsl_get_vector_elements(deref_type);
4035 break;
4036
4037 case SpvOpAtomicStore:
4038 atomic->num_components = glsl_get_vector_elements(deref_type);
4039 nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
4040 atomic->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
4041 break;
4042
4043 case SpvOpAtomicFlagClear:
4044 atomic->num_components = 1;
4045 nir_intrinsic_set_write_mask(atomic, 1);
4046 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4047 break;
4048 case SpvOpAtomicFlagTestAndSet:
4049 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4050 atomic->src[2] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, 32));
4051 break;
4052 case SpvOpAtomicExchange:
4053 case SpvOpAtomicCompareExchange:
4054 case SpvOpAtomicCompareExchangeWeak:
4055 case SpvOpAtomicIIncrement:
4056 case SpvOpAtomicIDecrement:
4057 case SpvOpAtomicIAdd:
4058 case SpvOpAtomicISub:
4059 case SpvOpAtomicSMin:
4060 case SpvOpAtomicUMin:
4061 case SpvOpAtomicSMax:
4062 case SpvOpAtomicUMax:
4063 case SpvOpAtomicAnd:
4064 case SpvOpAtomicOr:
4065 case SpvOpAtomicXor:
4066 case SpvOpAtomicFAddEXT:
4067 case SpvOpAtomicFMinEXT:
4068 case SpvOpAtomicFMaxEXT:
4069 fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
4070 break;
4071
4072 default:
4073 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
4074 }
4075 }
4076
4077 /* Atomic ordering operations will implicitly apply to the atomic operation
4078 * storage class, so include that too.
4079 */
4080 semantics |= vtn_mode_to_memory_semantics(ptr->mode);
4081
4082 SpvMemorySemanticsMask before_semantics;
4083 SpvMemorySemanticsMask after_semantics;
4084 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
4085
4086 if (before_semantics)
4087 vtn_emit_memory_barrier(b, scope, before_semantics);
4088
4089 if (opcode != SpvOpAtomicStore && opcode != SpvOpAtomicFlagClear) {
4090 struct vtn_type *type = vtn_get_type(b, w[1]);
4091
4092 if (opcode == SpvOpAtomicFlagTestAndSet) {
4093 /* map atomic flag to a 32-bit atomic integer. */
4094 nir_def_init(&atomic->instr, &atomic->def, 1, 32);
4095 } else {
4096 nir_def_init(&atomic->instr, &atomic->def,
4097 glsl_get_vector_elements(type->type),
4098 glsl_get_bit_size(type->type));
4099
4100 vtn_push_nir_ssa(b, w[2], &atomic->def);
4101 }
4102 }
4103
4104 nir_builder_instr_insert(&b->nb, &atomic->instr);
4105
4106 if (opcode == SpvOpAtomicFlagTestAndSet) {
4107 vtn_push_nir_ssa(b, w[2], nir_i2b(&b->nb, &atomic->def));
4108 }
4109 if (after_semantics)
4110 vtn_emit_memory_barrier(b, scope, after_semantics);
4111 }
4112
4113 static nir_alu_instr *
create_vec(struct vtn_builder * b,unsigned num_components,unsigned bit_size)4114 create_vec(struct vtn_builder *b, unsigned num_components, unsigned bit_size)
4115 {
4116 nir_op op = nir_op_vec(num_components);
4117 nir_alu_instr *vec = nir_alu_instr_create(b->shader, op);
4118 nir_def_init(&vec->instr, &vec->def, num_components, bit_size);
4119
4120 return vec;
4121 }
4122
4123 struct vtn_ssa_value *
vtn_ssa_transpose(struct vtn_builder * b,struct vtn_ssa_value * src)4124 vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
4125 {
4126 if (src->transposed)
4127 return src->transposed;
4128
4129 struct vtn_ssa_value *dest =
4130 vtn_create_ssa_value(b, glsl_transposed_type(src->type));
4131
4132 for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
4133 if (glsl_type_is_vector_or_scalar(src->type)) {
4134 dest->elems[i]->def = nir_channel(&b->nb, src->def, i);
4135 } else {
4136 unsigned cols = glsl_get_matrix_columns(src->type);
4137 nir_scalar srcs[NIR_MAX_MATRIX_COLUMNS];
4138 for (unsigned j = 0; j < cols; j++) {
4139 srcs[j] = nir_get_scalar(src->elems[j]->def, i);
4140 }
4141 dest->elems[i]->def = nir_vec_scalars(&b->nb, srcs, cols);
4142 }
4143 }
4144
4145 dest->transposed = src;
4146
4147 return dest;
4148 }
4149
4150 static nir_def *
vtn_vector_shuffle(struct vtn_builder * b,unsigned num_components,nir_def * src0,nir_def * src1,const uint32_t * indices)4151 vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
4152 nir_def *src0, nir_def *src1,
4153 const uint32_t *indices)
4154 {
4155 nir_alu_instr *vec = create_vec(b, num_components, src0->bit_size);
4156
4157 for (unsigned i = 0; i < num_components; i++) {
4158 uint32_t index = indices[i];
4159 unsigned total_components = src0->num_components + src1->num_components;
4160 vtn_fail_if(index != 0xffffffff && index >= total_components,
4161 "OpVectorShuffle: All Component literals must either be "
4162 "FFFFFFFF or in [0, N - 1] (inclusive)");
4163
4164 if (index == 0xffffffff) {
4165 vec->src[i].src =
4166 nir_src_for_ssa(nir_undef(&b->nb, 1, src0->bit_size));
4167 } else if (index < src0->num_components) {
4168 vec->src[i].src = nir_src_for_ssa(src0);
4169 vec->src[i].swizzle[0] = index;
4170 } else {
4171 vec->src[i].src = nir_src_for_ssa(src1);
4172 vec->src[i].swizzle[0] = index - src0->num_components;
4173 }
4174 }
4175
4176 nir_builder_instr_insert(&b->nb, &vec->instr);
4177
4178 return &vec->def;
4179 }
4180
4181 /*
4182 * Concatentates a number of vectors/scalars together to produce a vector
4183 */
4184 static nir_def *
vtn_vector_construct(struct vtn_builder * b,unsigned num_components,unsigned num_srcs,nir_def ** srcs)4185 vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
4186 unsigned num_srcs, nir_def **srcs)
4187 {
4188 nir_alu_instr *vec = create_vec(b, num_components, srcs[0]->bit_size);
4189
4190 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4191 *
4192 * "When constructing a vector, there must be at least two Constituent
4193 * operands."
4194 */
4195 vtn_assert(num_srcs >= 2);
4196
4197 unsigned dest_idx = 0;
4198 for (unsigned i = 0; i < num_srcs; i++) {
4199 nir_def *src = srcs[i];
4200 vtn_assert(dest_idx + src->num_components <= num_components);
4201 for (unsigned j = 0; j < src->num_components; j++) {
4202 vec->src[dest_idx].src = nir_src_for_ssa(src);
4203 vec->src[dest_idx].swizzle[0] = j;
4204 dest_idx++;
4205 }
4206 }
4207
4208 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4209 *
4210 * "When constructing a vector, the total number of components in all
4211 * the operands must equal the number of components in Result Type."
4212 */
4213 vtn_assert(dest_idx == num_components);
4214
4215 nir_builder_instr_insert(&b->nb, &vec->instr);
4216
4217 return &vec->def;
4218 }
4219
4220 static struct vtn_ssa_value *
vtn_composite_copy(struct vtn_builder * b,struct vtn_ssa_value * src)4221 vtn_composite_copy(struct vtn_builder *b, struct vtn_ssa_value *src)
4222 {
4223 assert(!src->is_variable);
4224
4225 struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
4226 dest->type = src->type;
4227
4228 if (glsl_type_is_vector_or_scalar(src->type)) {
4229 dest->def = src->def;
4230 } else {
4231 unsigned elems = glsl_get_length(src->type);
4232
4233 dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
4234 for (unsigned i = 0; i < elems; i++)
4235 dest->elems[i] = vtn_composite_copy(b, src->elems[i]);
4236 }
4237
4238 return dest;
4239 }
4240
4241 static struct vtn_ssa_value *
vtn_composite_insert(struct vtn_builder * b,struct vtn_ssa_value * src,struct vtn_ssa_value * insert,const uint32_t * indices,unsigned num_indices)4242 vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
4243 struct vtn_ssa_value *insert, const uint32_t *indices,
4244 unsigned num_indices)
4245 {
4246 if (glsl_type_is_cmat(src->type))
4247 return vtn_cooperative_matrix_insert(b, src, insert, indices, num_indices);
4248
4249 struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
4250
4251 struct vtn_ssa_value *cur = dest;
4252 unsigned i;
4253 for (i = 0; i < num_indices - 1; i++) {
4254 /* If we got a vector here, that means the next index will be trying to
4255 * dereference a scalar.
4256 */
4257 vtn_fail_if(glsl_type_is_vector_or_scalar(cur->type),
4258 "OpCompositeInsert has too many indices.");
4259 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4260 "All indices in an OpCompositeInsert must be in-bounds");
4261 cur = cur->elems[indices[i]];
4262 }
4263
4264 if (glsl_type_is_vector_or_scalar(cur->type)) {
4265 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4266 "All indices in an OpCompositeInsert must be in-bounds");
4267
4268 /* According to the SPIR-V spec, OpCompositeInsert may work down to
4269 * the component granularity. In that case, the last index will be
4270 * the index to insert the scalar into the vector.
4271 */
4272
4273 cur->def = nir_vector_insert_imm(&b->nb, cur->def, insert->def, indices[i]);
4274 } else {
4275 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4276 "All indices in an OpCompositeInsert must be in-bounds");
4277 cur->elems[indices[i]] = insert;
4278 }
4279
4280 return dest;
4281 }
4282
4283 static struct vtn_ssa_value *
vtn_composite_extract(struct vtn_builder * b,struct vtn_ssa_value * src,const uint32_t * indices,unsigned num_indices)4284 vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
4285 const uint32_t *indices, unsigned num_indices)
4286 {
4287 if (glsl_type_is_cmat(src->type))
4288 return vtn_cooperative_matrix_extract(b, src, indices, num_indices);
4289
4290 struct vtn_ssa_value *cur = src;
4291 for (unsigned i = 0; i < num_indices; i++) {
4292 if (glsl_type_is_vector_or_scalar(cur->type)) {
4293 vtn_assert(i == num_indices - 1);
4294 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4295 "All indices in an OpCompositeExtract must be in-bounds");
4296
4297 /* According to the SPIR-V spec, OpCompositeExtract may work down to
4298 * the component granularity. The last index will be the index of the
4299 * vector to extract.
4300 */
4301
4302 const struct glsl_type *scalar_type =
4303 glsl_scalar_type(glsl_get_base_type(cur->type));
4304 struct vtn_ssa_value *ret = vtn_create_ssa_value(b, scalar_type);
4305 ret->def = nir_channel(&b->nb, cur->def, indices[i]);
4306 return ret;
4307 } else {
4308 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4309 "All indices in an OpCompositeExtract must be in-bounds");
4310 cur = cur->elems[indices[i]];
4311 }
4312 }
4313
4314 return cur;
4315 }
4316
4317 static void
vtn_handle_composite(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4318 vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
4319 const uint32_t *w, unsigned count)
4320 {
4321 struct vtn_type *type = vtn_get_type(b, w[1]);
4322 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
4323
4324 switch (opcode) {
4325 case SpvOpVectorExtractDynamic:
4326 ssa->def = nir_vector_extract(&b->nb, vtn_get_nir_ssa(b, w[3]),
4327 vtn_get_nir_ssa(b, w[4]));
4328 break;
4329
4330 case SpvOpVectorInsertDynamic:
4331 ssa->def = nir_vector_insert(&b->nb, vtn_get_nir_ssa(b, w[3]),
4332 vtn_get_nir_ssa(b, w[4]),
4333 vtn_get_nir_ssa(b, w[5]));
4334 break;
4335
4336 case SpvOpVectorShuffle:
4337 ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type),
4338 vtn_get_nir_ssa(b, w[3]),
4339 vtn_get_nir_ssa(b, w[4]),
4340 w + 5);
4341 break;
4342
4343 case SpvOpCompositeConstruct: {
4344 unsigned elems = count - 3;
4345 assume(elems >= 1);
4346 if (type->base_type == vtn_base_type_cooperative_matrix) {
4347 vtn_assert(elems == 1);
4348 nir_deref_instr *mat = vtn_create_cmat_temporary(b, type->type, "cmat_construct");
4349 nir_cmat_construct(&b->nb, &mat->def, vtn_get_nir_ssa(b, w[3]));
4350 vtn_set_ssa_value_var(b, ssa, mat->var);
4351 } else if (glsl_type_is_vector_or_scalar(type->type)) {
4352 nir_def *srcs[NIR_MAX_VEC_COMPONENTS];
4353 for (unsigned i = 0; i < elems; i++) {
4354 srcs[i] = vtn_get_nir_ssa(b, w[3 + i]);
4355 vtn_assert(glsl_get_bit_size(type->type) == srcs[i]->bit_size);
4356 }
4357 ssa->def =
4358 vtn_vector_construct(b, glsl_get_vector_elements(type->type),
4359 elems, srcs);
4360 } else {
4361 ssa->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
4362 for (unsigned i = 0; i < elems; i++)
4363 ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
4364 }
4365 break;
4366 }
4367 case SpvOpCompositeExtract:
4368 ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
4369 w + 4, count - 4);
4370 break;
4371
4372 case SpvOpCompositeInsert:
4373 ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
4374 vtn_ssa_value(b, w[3]),
4375 w + 5, count - 5);
4376 break;
4377
4378 case SpvOpCopyLogical: {
4379 ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
4380 struct vtn_type *dst_type = vtn_get_value_type(b, w[2]);
4381 vtn_assert(vtn_types_compatible(b, type, dst_type));
4382 ssa->type = glsl_get_bare_type(dst_type->type);
4383 break;
4384 }
4385 case SpvOpCopyObject:
4386 case SpvOpExpectKHR:
4387 vtn_copy_value(b, w[3], w[2]);
4388 return;
4389
4390 default:
4391 vtn_fail_with_opcode("unknown composite operation", opcode);
4392 }
4393
4394 vtn_push_ssa_value(b, w[2], ssa);
4395 }
4396
4397 static void
vtn_handle_barrier(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4398 vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
4399 const uint32_t *w, UNUSED unsigned count)
4400 {
4401 switch (opcode) {
4402 case SpvOpEmitVertex:
4403 case SpvOpEmitStreamVertex:
4404 case SpvOpEndPrimitive:
4405 case SpvOpEndStreamPrimitive: {
4406 unsigned stream = 0;
4407 if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
4408 stream = vtn_constant_uint(b, w[1]);
4409
4410 switch (opcode) {
4411 case SpvOpEmitStreamVertex:
4412 case SpvOpEmitVertex:
4413 nir_emit_vertex(&b->nb, stream);
4414 break;
4415 case SpvOpEndPrimitive:
4416 case SpvOpEndStreamPrimitive:
4417 nir_end_primitive(&b->nb, stream);
4418 break;
4419 default:
4420 unreachable("Invalid opcode");
4421 }
4422 break;
4423 }
4424
4425 case SpvOpMemoryBarrier: {
4426 SpvScope scope = vtn_constant_uint(b, w[1]);
4427 SpvMemorySemanticsMask semantics = vtn_constant_uint(b, w[2]);
4428 vtn_emit_memory_barrier(b, scope, semantics);
4429 return;
4430 }
4431
4432 case SpvOpControlBarrier: {
4433 SpvScope execution_scope = vtn_constant_uint(b, w[1]);
4434 SpvScope memory_scope = vtn_constant_uint(b, w[2]);
4435 SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]);
4436
4437 /* GLSLang, prior to commit 8297936dd6eb3, emitted OpControlBarrier with
4438 * memory semantics of None for GLSL barrier().
4439 * And before that, prior to c3f1cdfa, emitted the OpControlBarrier with
4440 * Device instead of Workgroup for execution scope.
4441 */
4442 if (b->wa_glslang_cs_barrier &&
4443 b->nb.shader->info.stage == MESA_SHADER_COMPUTE &&
4444 (execution_scope == SpvScopeWorkgroup ||
4445 execution_scope == SpvScopeDevice) &&
4446 memory_semantics == SpvMemorySemanticsMaskNone) {
4447 execution_scope = SpvScopeWorkgroup;
4448 memory_scope = SpvScopeWorkgroup;
4449 memory_semantics = SpvMemorySemanticsAcquireReleaseMask |
4450 SpvMemorySemanticsWorkgroupMemoryMask;
4451 }
4452
4453 /* From the SPIR-V spec:
4454 *
4455 * "When used with the TessellationControl execution model, it also
4456 * implicitly synchronizes the Output Storage Class: Writes to Output
4457 * variables performed by any invocation executed prior to a
4458 * OpControlBarrier will be visible to any other invocation after
4459 * return from that OpControlBarrier."
4460 *
4461 * The same applies to VK_NV_mesh_shader.
4462 */
4463 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL ||
4464 b->nb.shader->info.stage == MESA_SHADER_TASK ||
4465 b->nb.shader->info.stage == MESA_SHADER_MESH) {
4466 memory_semantics &= ~(SpvMemorySemanticsAcquireMask |
4467 SpvMemorySemanticsReleaseMask |
4468 SpvMemorySemanticsAcquireReleaseMask |
4469 SpvMemorySemanticsSequentiallyConsistentMask);
4470 memory_semantics |= SpvMemorySemanticsAcquireReleaseMask |
4471 SpvMemorySemanticsOutputMemoryMask;
4472 }
4473
4474 vtn_emit_scoped_control_barrier(b, execution_scope, memory_scope,
4475 memory_semantics);
4476 break;
4477 }
4478
4479 default:
4480 unreachable("unknown barrier instruction");
4481 }
4482 }
4483
4484 static enum tess_primitive_mode
tess_primitive_mode_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4485 tess_primitive_mode_from_spv_execution_mode(struct vtn_builder *b,
4486 SpvExecutionMode mode)
4487 {
4488 switch (mode) {
4489 case SpvExecutionModeTriangles:
4490 return TESS_PRIMITIVE_TRIANGLES;
4491 case SpvExecutionModeQuads:
4492 return TESS_PRIMITIVE_QUADS;
4493 case SpvExecutionModeIsolines:
4494 return TESS_PRIMITIVE_ISOLINES;
4495 default:
4496 vtn_fail("Invalid tess primitive type: %s (%u)",
4497 spirv_executionmode_to_string(mode), mode);
4498 }
4499 }
4500
4501 static enum mesa_prim
primitive_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4502 primitive_from_spv_execution_mode(struct vtn_builder *b,
4503 SpvExecutionMode mode)
4504 {
4505 switch (mode) {
4506 case SpvExecutionModeInputPoints:
4507 case SpvExecutionModeOutputPoints:
4508 return MESA_PRIM_POINTS;
4509 case SpvExecutionModeInputLines:
4510 case SpvExecutionModeOutputLinesNV:
4511 return MESA_PRIM_LINES;
4512 case SpvExecutionModeInputLinesAdjacency:
4513 return MESA_PRIM_LINES_ADJACENCY;
4514 case SpvExecutionModeTriangles:
4515 case SpvExecutionModeOutputTrianglesNV:
4516 return MESA_PRIM_TRIANGLES;
4517 case SpvExecutionModeInputTrianglesAdjacency:
4518 return MESA_PRIM_TRIANGLES_ADJACENCY;
4519 case SpvExecutionModeQuads:
4520 return MESA_PRIM_QUADS;
4521 case SpvExecutionModeOutputLineStrip:
4522 return MESA_PRIM_LINE_STRIP;
4523 case SpvExecutionModeOutputTriangleStrip:
4524 return MESA_PRIM_TRIANGLE_STRIP;
4525 default:
4526 vtn_fail("Invalid primitive type: %s (%u)",
4527 spirv_executionmode_to_string(mode), mode);
4528 }
4529 }
4530
4531 static unsigned
vertices_in_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4532 vertices_in_from_spv_execution_mode(struct vtn_builder *b,
4533 SpvExecutionMode mode)
4534 {
4535 switch (mode) {
4536 case SpvExecutionModeInputPoints:
4537 return 1;
4538 case SpvExecutionModeInputLines:
4539 return 2;
4540 case SpvExecutionModeInputLinesAdjacency:
4541 return 4;
4542 case SpvExecutionModeTriangles:
4543 return 3;
4544 case SpvExecutionModeInputTrianglesAdjacency:
4545 return 6;
4546 default:
4547 vtn_fail("Invalid GS input mode: %s (%u)",
4548 spirv_executionmode_to_string(mode), mode);
4549 }
4550 }
4551
4552 gl_shader_stage
vtn_stage_for_execution_model(SpvExecutionModel model)4553 vtn_stage_for_execution_model(SpvExecutionModel model)
4554 {
4555 switch (model) {
4556 case SpvExecutionModelVertex:
4557 return MESA_SHADER_VERTEX;
4558 case SpvExecutionModelTessellationControl:
4559 return MESA_SHADER_TESS_CTRL;
4560 case SpvExecutionModelTessellationEvaluation:
4561 return MESA_SHADER_TESS_EVAL;
4562 case SpvExecutionModelGeometry:
4563 return MESA_SHADER_GEOMETRY;
4564 case SpvExecutionModelFragment:
4565 return MESA_SHADER_FRAGMENT;
4566 case SpvExecutionModelGLCompute:
4567 return MESA_SHADER_COMPUTE;
4568 case SpvExecutionModelKernel:
4569 return MESA_SHADER_KERNEL;
4570 case SpvExecutionModelRayGenerationKHR:
4571 return MESA_SHADER_RAYGEN;
4572 case SpvExecutionModelAnyHitKHR:
4573 return MESA_SHADER_ANY_HIT;
4574 case SpvExecutionModelClosestHitKHR:
4575 return MESA_SHADER_CLOSEST_HIT;
4576 case SpvExecutionModelMissKHR:
4577 return MESA_SHADER_MISS;
4578 case SpvExecutionModelIntersectionKHR:
4579 return MESA_SHADER_INTERSECTION;
4580 case SpvExecutionModelCallableKHR:
4581 return MESA_SHADER_CALLABLE;
4582 case SpvExecutionModelTaskNV:
4583 case SpvExecutionModelTaskEXT:
4584 return MESA_SHADER_TASK;
4585 case SpvExecutionModelMeshNV:
4586 case SpvExecutionModelMeshEXT:
4587 return MESA_SHADER_MESH;
4588 default:
4589 return MESA_SHADER_NONE;
4590 }
4591 }
4592
4593 #define spv_check_supported(name, cap) do { \
4594 if (!(b->options && b->options->caps.name)) \
4595 vtn_warn("Unsupported SPIR-V capability: %s (%u)", \
4596 spirv_capability_to_string(cap), cap); \
4597 } while(0)
4598
4599
4600 void
vtn_handle_entry_point(struct vtn_builder * b,const uint32_t * w,unsigned count)4601 vtn_handle_entry_point(struct vtn_builder *b, const uint32_t *w,
4602 unsigned count)
4603 {
4604 struct vtn_value *entry_point = &b->values[w[2]];
4605 /* Let this be a name label regardless */
4606 unsigned name_words;
4607 entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
4608
4609 gl_shader_stage stage = vtn_stage_for_execution_model(w[1]);
4610 vtn_fail_if(stage == MESA_SHADER_NONE,
4611 "Unsupported execution model: %s (%u)",
4612 spirv_executionmodel_to_string(w[1]), w[1]);
4613 if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
4614 stage != b->entry_point_stage)
4615 return;
4616
4617 vtn_assert(b->entry_point == NULL);
4618 b->entry_point = entry_point;
4619
4620 /* Entry points enumerate which global variables are used. */
4621 size_t start = 3 + name_words;
4622 b->interface_ids_count = count - start;
4623 b->interface_ids = vtn_alloc_array(b, uint32_t, b->interface_ids_count);
4624 memcpy(b->interface_ids, &w[start], b->interface_ids_count * 4);
4625 qsort(b->interface_ids, b->interface_ids_count, 4, cmp_uint32_t);
4626 }
4627
4628 static bool
vtn_handle_preamble_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4629 vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
4630 const uint32_t *w, unsigned count)
4631 {
4632 switch (opcode) {
4633 case SpvOpString:
4634 case SpvOpSource:
4635 case SpvOpSourceExtension:
4636 case SpvOpSourceContinued:
4637 case SpvOpModuleProcessed:
4638 vtn_handle_debug_text(b, opcode, w, count);
4639 break;
4640
4641 case SpvOpExtension: {
4642 /* Implementing both NV_mesh_shader and EXT_mesh_shader
4643 * is difficult without knowing which we're dealing with.
4644 * TODO: remove this when we stop supporting NV_mesh_shader.
4645 */
4646 const char *ext_name = (const char *)&w[1];
4647 if (strcmp(ext_name, "SPV_NV_mesh_shader") == 0)
4648 b->shader->info.mesh.nv = true;
4649 break;
4650 }
4651
4652 case SpvOpCapability: {
4653 SpvCapability cap = w[1];
4654 switch (cap) {
4655 case SpvCapabilityMatrix:
4656 case SpvCapabilityShader:
4657 case SpvCapabilityGeometry:
4658 case SpvCapabilityGeometryPointSize:
4659 case SpvCapabilityUniformBufferArrayDynamicIndexing:
4660 case SpvCapabilitySampledImageArrayDynamicIndexing:
4661 case SpvCapabilityStorageBufferArrayDynamicIndexing:
4662 case SpvCapabilityStorageImageArrayDynamicIndexing:
4663 case SpvCapabilityImageRect:
4664 case SpvCapabilitySampledRect:
4665 case SpvCapabilitySampled1D:
4666 case SpvCapabilityImage1D:
4667 case SpvCapabilitySampledCubeArray:
4668 case SpvCapabilityImageCubeArray:
4669 case SpvCapabilitySampledBuffer:
4670 case SpvCapabilityImageBuffer:
4671 case SpvCapabilityImageQuery:
4672 case SpvCapabilityDerivativeControl:
4673 case SpvCapabilityInterpolationFunction:
4674 case SpvCapabilityMultiViewport:
4675 case SpvCapabilitySampleRateShading:
4676 case SpvCapabilityClipDistance:
4677 case SpvCapabilityCullDistance:
4678 case SpvCapabilityInputAttachment:
4679 case SpvCapabilityImageGatherExtended:
4680 case SpvCapabilityStorageImageExtendedFormats:
4681 case SpvCapabilityVector16:
4682 case SpvCapabilityDotProduct:
4683 case SpvCapabilityDotProductInputAll:
4684 case SpvCapabilityDotProductInput4x8Bit:
4685 case SpvCapabilityDotProductInput4x8BitPacked:
4686 case SpvCapabilityExpectAssumeKHR:
4687 break;
4688
4689 case SpvCapabilityLinkage:
4690 if (!b->options->create_library)
4691 vtn_warn("Unsupported SPIR-V capability: %s",
4692 spirv_capability_to_string(cap));
4693 spv_check_supported(linkage, cap);
4694 break;
4695
4696 case SpvCapabilitySparseResidency:
4697 spv_check_supported(sparse_residency, cap);
4698 break;
4699
4700 case SpvCapabilityMinLod:
4701 spv_check_supported(min_lod, cap);
4702 break;
4703
4704 case SpvCapabilityAtomicStorage:
4705 spv_check_supported(atomic_storage, cap);
4706 break;
4707
4708 case SpvCapabilityFloat64:
4709 spv_check_supported(float64, cap);
4710 break;
4711 case SpvCapabilityInt64:
4712 spv_check_supported(int64, cap);
4713 break;
4714 case SpvCapabilityInt16:
4715 spv_check_supported(int16, cap);
4716 break;
4717 case SpvCapabilityInt8:
4718 spv_check_supported(int8, cap);
4719 break;
4720
4721 case SpvCapabilityTransformFeedback:
4722 spv_check_supported(transform_feedback, cap);
4723 break;
4724
4725 case SpvCapabilityGeometryStreams:
4726 spv_check_supported(geometry_streams, cap);
4727 break;
4728
4729 case SpvCapabilityInt64Atomics:
4730 spv_check_supported(int64_atomics, cap);
4731 break;
4732
4733 case SpvCapabilityStorageImageMultisample:
4734 spv_check_supported(storage_image_ms, cap);
4735 break;
4736
4737 case SpvCapabilityAddresses:
4738 spv_check_supported(address, cap);
4739 break;
4740
4741 case SpvCapabilityKernel:
4742 case SpvCapabilityFloat16Buffer:
4743 spv_check_supported(kernel, cap);
4744 break;
4745
4746 case SpvCapabilityGenericPointer:
4747 spv_check_supported(generic_pointers, cap);
4748 break;
4749
4750 case SpvCapabilityImageBasic:
4751 spv_check_supported(kernel_image, cap);
4752 break;
4753
4754 case SpvCapabilityImageReadWrite:
4755 spv_check_supported(kernel_image_read_write, cap);
4756 break;
4757
4758 case SpvCapabilityLiteralSampler:
4759 spv_check_supported(literal_sampler, cap);
4760 break;
4761
4762 case SpvCapabilityImageMipmap:
4763 case SpvCapabilityPipes:
4764 case SpvCapabilityDeviceEnqueue:
4765 vtn_warn("Unsupported OpenCL-style SPIR-V capability: %s",
4766 spirv_capability_to_string(cap));
4767 break;
4768
4769 case SpvCapabilityImageMSArray:
4770 spv_check_supported(image_ms_array, cap);
4771 break;
4772
4773 case SpvCapabilityTessellation:
4774 case SpvCapabilityTessellationPointSize:
4775 spv_check_supported(tessellation, cap);
4776 break;
4777
4778 case SpvCapabilityDrawParameters:
4779 spv_check_supported(draw_parameters, cap);
4780 break;
4781
4782 case SpvCapabilityStorageImageReadWithoutFormat:
4783 spv_check_supported(image_read_without_format, cap);
4784 break;
4785
4786 case SpvCapabilityStorageImageWriteWithoutFormat:
4787 spv_check_supported(image_write_without_format, cap);
4788 break;
4789
4790 case SpvCapabilityDeviceGroup:
4791 spv_check_supported(device_group, cap);
4792 break;
4793
4794 case SpvCapabilityMultiView:
4795 spv_check_supported(multiview, cap);
4796 break;
4797
4798 case SpvCapabilityGroupNonUniform:
4799 spv_check_supported(subgroup_basic, cap);
4800 break;
4801
4802 case SpvCapabilitySubgroupVoteKHR:
4803 case SpvCapabilityGroupNonUniformVote:
4804 spv_check_supported(subgroup_vote, cap);
4805 break;
4806
4807 case SpvCapabilitySubgroupBallotKHR:
4808 case SpvCapabilityGroupNonUniformBallot:
4809 spv_check_supported(subgroup_ballot, cap);
4810 break;
4811
4812 case SpvCapabilityGroupNonUniformShuffle:
4813 case SpvCapabilityGroupNonUniformShuffleRelative:
4814 spv_check_supported(subgroup_shuffle, cap);
4815 break;
4816
4817 case SpvCapabilityGroupNonUniformQuad:
4818 spv_check_supported(subgroup_quad, cap);
4819 break;
4820
4821 case SpvCapabilityGroupNonUniformArithmetic:
4822 case SpvCapabilityGroupNonUniformClustered:
4823 spv_check_supported(subgroup_arithmetic, cap);
4824 break;
4825
4826 case SpvCapabilityGroups:
4827 spv_check_supported(groups, cap);
4828 break;
4829
4830 case SpvCapabilitySubgroupDispatch:
4831 spv_check_supported(subgroup_dispatch, cap);
4832 /* Missing :
4833 * - SpvOpGetKernelLocalSizeForSubgroupCount
4834 * - SpvOpGetKernelMaxNumSubgroups
4835 */
4836 vtn_warn("Not fully supported capability: %s",
4837 spirv_capability_to_string(cap));
4838 break;
4839
4840 case SpvCapabilityVariablePointersStorageBuffer:
4841 case SpvCapabilityVariablePointers:
4842 spv_check_supported(variable_pointers, cap);
4843 b->variable_pointers = true;
4844 break;
4845
4846 case SpvCapabilityStorageUniformBufferBlock16:
4847 case SpvCapabilityStorageUniform16:
4848 case SpvCapabilityStoragePushConstant16:
4849 case SpvCapabilityStorageInputOutput16:
4850 spv_check_supported(storage_16bit, cap);
4851 break;
4852
4853 case SpvCapabilityShaderLayer:
4854 case SpvCapabilityShaderViewportIndex:
4855 case SpvCapabilityShaderViewportIndexLayerEXT:
4856 spv_check_supported(shader_viewport_index_layer, cap);
4857 break;
4858
4859 case SpvCapabilityStorageBuffer8BitAccess:
4860 case SpvCapabilityUniformAndStorageBuffer8BitAccess:
4861 case SpvCapabilityStoragePushConstant8:
4862 spv_check_supported(storage_8bit, cap);
4863 break;
4864
4865 case SpvCapabilityShaderNonUniformEXT:
4866 spv_check_supported(descriptor_indexing, cap);
4867 break;
4868
4869 case SpvCapabilityInputAttachmentArrayDynamicIndexingEXT:
4870 case SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT:
4871 case SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT:
4872 spv_check_supported(descriptor_array_dynamic_indexing, cap);
4873 break;
4874
4875 case SpvCapabilityUniformBufferArrayNonUniformIndexingEXT:
4876 case SpvCapabilitySampledImageArrayNonUniformIndexingEXT:
4877 case SpvCapabilityStorageBufferArrayNonUniformIndexingEXT:
4878 case SpvCapabilityStorageImageArrayNonUniformIndexingEXT:
4879 case SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT:
4880 case SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT:
4881 case SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT:
4882 spv_check_supported(descriptor_array_non_uniform_indexing, cap);
4883 break;
4884
4885 case SpvCapabilityRuntimeDescriptorArrayEXT:
4886 spv_check_supported(runtime_descriptor_array, cap);
4887 break;
4888
4889 case SpvCapabilityStencilExportEXT:
4890 spv_check_supported(stencil_export, cap);
4891 break;
4892
4893 case SpvCapabilitySampleMaskPostDepthCoverage:
4894 spv_check_supported(post_depth_coverage, cap);
4895 break;
4896
4897 case SpvCapabilityDenormFlushToZero:
4898 case SpvCapabilityDenormPreserve:
4899 case SpvCapabilitySignedZeroInfNanPreserve:
4900 case SpvCapabilityRoundingModeRTE:
4901 case SpvCapabilityRoundingModeRTZ:
4902 spv_check_supported(float_controls, cap);
4903 break;
4904
4905 case SpvCapabilityPhysicalStorageBufferAddresses:
4906 spv_check_supported(physical_storage_buffer_address, cap);
4907 break;
4908
4909 case SpvCapabilityComputeDerivativeGroupQuadsNV:
4910 case SpvCapabilityComputeDerivativeGroupLinearNV:
4911 spv_check_supported(derivative_group, cap);
4912 break;
4913
4914 case SpvCapabilityFloat16:
4915 spv_check_supported(float16, cap);
4916 break;
4917
4918 case SpvCapabilityFragmentShaderSampleInterlockEXT:
4919 spv_check_supported(fragment_shader_sample_interlock, cap);
4920 break;
4921
4922 case SpvCapabilityFragmentShaderPixelInterlockEXT:
4923 spv_check_supported(fragment_shader_pixel_interlock, cap);
4924 break;
4925
4926 case SpvCapabilityShaderSMBuiltinsNV:
4927 spv_check_supported(shader_sm_builtins_nv, cap);
4928 break;
4929
4930 case SpvCapabilityDemoteToHelperInvocation:
4931 spv_check_supported(demote_to_helper_invocation, cap);
4932 b->uses_demote_to_helper_invocation = true;
4933 break;
4934
4935 case SpvCapabilityShaderClockKHR:
4936 spv_check_supported(shader_clock, cap);
4937 break;
4938
4939 case SpvCapabilityVulkanMemoryModel:
4940 spv_check_supported(vk_memory_model, cap);
4941 break;
4942
4943 case SpvCapabilityVulkanMemoryModelDeviceScope:
4944 spv_check_supported(vk_memory_model_device_scope, cap);
4945 break;
4946
4947 case SpvCapabilityImageReadWriteLodAMD:
4948 spv_check_supported(amd_image_read_write_lod, cap);
4949 break;
4950
4951 case SpvCapabilityIntegerFunctions2INTEL:
4952 spv_check_supported(integer_functions2, cap);
4953 break;
4954
4955 case SpvCapabilityFragmentMaskAMD:
4956 spv_check_supported(amd_fragment_mask, cap);
4957 break;
4958
4959 case SpvCapabilityImageGatherBiasLodAMD:
4960 spv_check_supported(amd_image_gather_bias_lod, cap);
4961 b->image_gather_bias_lod = true;
4962 break;
4963
4964 case SpvCapabilityAtomicFloat16AddEXT:
4965 spv_check_supported(float16_atomic_add, cap);
4966 break;
4967
4968 case SpvCapabilityAtomicFloat32AddEXT:
4969 spv_check_supported(float32_atomic_add, cap);
4970 break;
4971
4972 case SpvCapabilityAtomicFloat64AddEXT:
4973 spv_check_supported(float64_atomic_add, cap);
4974 break;
4975
4976 case SpvCapabilitySubgroupShuffleINTEL:
4977 spv_check_supported(intel_subgroup_shuffle, cap);
4978 break;
4979
4980 case SpvCapabilitySubgroupBufferBlockIOINTEL:
4981 spv_check_supported(intel_subgroup_buffer_block_io, cap);
4982 break;
4983
4984 case SpvCapabilityRayCullMaskKHR:
4985 spv_check_supported(ray_cull_mask, cap);
4986 break;
4987
4988 case SpvCapabilityRayTracingKHR:
4989 spv_check_supported(ray_tracing, cap);
4990 break;
4991
4992 case SpvCapabilityRayQueryKHR:
4993 spv_check_supported(ray_query, cap);
4994 break;
4995
4996 case SpvCapabilityRayTraversalPrimitiveCullingKHR:
4997 spv_check_supported(ray_traversal_primitive_culling, cap);
4998 break;
4999
5000 case SpvCapabilityInt64ImageEXT:
5001 spv_check_supported(image_atomic_int64, cap);
5002 break;
5003
5004 case SpvCapabilityFragmentShadingRateKHR:
5005 spv_check_supported(fragment_shading_rate, cap);
5006 break;
5007
5008 case SpvCapabilityWorkgroupMemoryExplicitLayoutKHR:
5009 spv_check_supported(workgroup_memory_explicit_layout, cap);
5010 break;
5011
5012 case SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR:
5013 spv_check_supported(workgroup_memory_explicit_layout, cap);
5014 spv_check_supported(storage_8bit, cap);
5015 break;
5016
5017 case SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR:
5018 spv_check_supported(workgroup_memory_explicit_layout, cap);
5019 spv_check_supported(storage_16bit, cap);
5020 break;
5021
5022 case SpvCapabilityAtomicFloat16MinMaxEXT:
5023 spv_check_supported(float16_atomic_min_max, cap);
5024 break;
5025
5026 case SpvCapabilityAtomicFloat32MinMaxEXT:
5027 spv_check_supported(float32_atomic_min_max, cap);
5028 break;
5029
5030 case SpvCapabilityAtomicFloat64MinMaxEXT:
5031 spv_check_supported(float64_atomic_min_max, cap);
5032 break;
5033
5034 case SpvCapabilityMeshShadingEXT:
5035 spv_check_supported(mesh_shading, cap);
5036 break;
5037
5038 case SpvCapabilityMeshShadingNV:
5039 spv_check_supported(mesh_shading_nv, cap);
5040 break;
5041
5042 case SpvCapabilityPerViewAttributesNV:
5043 spv_check_supported(per_view_attributes_nv, cap);
5044 break;
5045
5046 case SpvCapabilityShaderViewportMaskNV:
5047 spv_check_supported(shader_viewport_mask_nv, cap);
5048 break;
5049
5050 case SpvCapabilityGroupNonUniformRotateKHR:
5051 spv_check_supported(subgroup_rotate, cap);
5052 break;
5053
5054 case SpvCapabilityFragmentFullyCoveredEXT:
5055 spv_check_supported(fragment_fully_covered, cap);
5056 break;
5057
5058 case SpvCapabilityFragmentDensityEXT:
5059 spv_check_supported(fragment_density, cap);
5060 break;
5061
5062 case SpvCapabilityRayTracingPositionFetchKHR:
5063 case SpvCapabilityRayQueryPositionFetchKHR:
5064 spv_check_supported(ray_tracing_position_fetch, cap);
5065 break;
5066
5067 case SpvCapabilityFragmentBarycentricKHR:
5068 spv_check_supported(fragment_barycentric, cap);
5069 break;
5070
5071 case SpvCapabilityShaderEnqueueAMDX:
5072 spv_check_supported(shader_enqueue, cap);
5073 break;
5074
5075 case SpvCapabilityCooperativeMatrixKHR:
5076 spv_check_supported(cooperative_matrix, cap);
5077 break;
5078
5079 case SpvCapabilityQuadControlKHR:
5080 spv_check_supported(quad_control, cap);
5081 break;
5082
5083 default:
5084 vtn_fail("Unhandled capability: %s (%u)",
5085 spirv_capability_to_string(cap), cap);
5086 }
5087 break;
5088 }
5089
5090 case SpvOpExtInstImport:
5091 vtn_handle_extension(b, opcode, w, count);
5092 break;
5093
5094 case SpvOpMemoryModel:
5095 switch (w[1]) {
5096 case SpvAddressingModelPhysical32:
5097 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
5098 "AddressingModelPhysical32 only supported for kernels");
5099 b->shader->info.cs.ptr_size = 32;
5100 b->physical_ptrs = true;
5101 assert(nir_address_format_bit_size(b->options->global_addr_format) == 32);
5102 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
5103 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 32);
5104 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
5105 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 32);
5106 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
5107 break;
5108 case SpvAddressingModelPhysical64:
5109 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
5110 "AddressingModelPhysical64 only supported for kernels");
5111 b->shader->info.cs.ptr_size = 64;
5112 b->physical_ptrs = true;
5113 assert(nir_address_format_bit_size(b->options->global_addr_format) == 64);
5114 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
5115 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 64);
5116 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
5117 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 64);
5118 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
5119 break;
5120 case SpvAddressingModelLogical:
5121 vtn_fail_if(b->shader->info.stage == MESA_SHADER_KERNEL,
5122 "AddressingModelLogical only supported for shaders");
5123 b->physical_ptrs = false;
5124 break;
5125 case SpvAddressingModelPhysicalStorageBuffer64:
5126 vtn_fail_if(!b->options ||
5127 !b->options->caps.physical_storage_buffer_address,
5128 "AddressingModelPhysicalStorageBuffer64 not supported");
5129 break;
5130 default:
5131 vtn_fail("Unknown addressing model: %s (%u)",
5132 spirv_addressingmodel_to_string(w[1]), w[1]);
5133 break;
5134 }
5135
5136 b->mem_model = w[2];
5137 switch (w[2]) {
5138 case SpvMemoryModelSimple:
5139 case SpvMemoryModelGLSL450:
5140 case SpvMemoryModelOpenCL:
5141 break;
5142 case SpvMemoryModelVulkan:
5143 vtn_fail_if(!b->options->caps.vk_memory_model,
5144 "Vulkan memory model is unsupported by this driver");
5145 break;
5146 default:
5147 vtn_fail("Unsupported memory model: %s",
5148 spirv_memorymodel_to_string(w[2]));
5149 break;
5150 }
5151 break;
5152
5153 case SpvOpEntryPoint:
5154 vtn_handle_entry_point(b, w, count);
5155 break;
5156
5157 case SpvOpName:
5158 b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
5159 break;
5160
5161 case SpvOpMemberName:
5162 case SpvOpExecutionMode:
5163 case SpvOpExecutionModeId:
5164 case SpvOpDecorationGroup:
5165 case SpvOpDecorate:
5166 case SpvOpDecorateId:
5167 case SpvOpMemberDecorate:
5168 case SpvOpGroupDecorate:
5169 case SpvOpGroupMemberDecorate:
5170 case SpvOpDecorateString:
5171 case SpvOpMemberDecorateString:
5172 vtn_handle_decoration(b, opcode, w, count);
5173 break;
5174
5175 case SpvOpExtInst: {
5176 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5177 if (val->ext_handler == vtn_handle_non_semantic_instruction) {
5178 /* NonSemantic extended instructions are acceptable in preamble. */
5179 vtn_handle_non_semantic_instruction(b, w[4], w, count);
5180 return true;
5181 } else {
5182 return false; /* End of preamble. */
5183 }
5184 }
5185
5186 default:
5187 return false; /* End of preamble */
5188 }
5189
5190 return true;
5191 }
5192
5193 void
vtn_handle_debug_text(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5194 vtn_handle_debug_text(struct vtn_builder *b, SpvOp opcode,
5195 const uint32_t *w, unsigned count)
5196 {
5197 switch (opcode) {
5198 case SpvOpString:
5199 vtn_push_value(b, w[1], vtn_value_type_string)->str =
5200 vtn_string_literal(b, &w[2], count - 2, NULL);
5201 break;
5202
5203 case SpvOpSource: {
5204 const char *lang;
5205 switch (w[1]) {
5206 default:
5207 case SpvSourceLanguageUnknown: lang = "unknown"; break;
5208 case SpvSourceLanguageESSL: lang = "ESSL"; break;
5209 case SpvSourceLanguageGLSL: lang = "GLSL"; break;
5210 case SpvSourceLanguageOpenCL_C: lang = "OpenCL C"; break;
5211 case SpvSourceLanguageOpenCL_CPP: lang = "OpenCL C++"; break;
5212 case SpvSourceLanguageHLSL: lang = "HLSL"; break;
5213 }
5214
5215 uint32_t version = w[2];
5216
5217 const char *file =
5218 (count > 3) ? vtn_value(b, w[3], vtn_value_type_string)->str : "";
5219
5220 vtn_info("Parsing SPIR-V from %s %u source file %s", lang, version, file);
5221
5222 b->source_lang = w[1];
5223 break;
5224 }
5225
5226 case SpvOpSourceExtension:
5227 case SpvOpSourceContinued:
5228 case SpvOpModuleProcessed:
5229 /* Unhandled, but these are for debug so that's ok. */
5230 break;
5231
5232 default:
5233 unreachable("Unhandled opcode");
5234 }
5235 }
5236
5237 static void
vtn_handle_execution_mode(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5238 vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
5239 const struct vtn_decoration *mode, UNUSED void *data)
5240 {
5241 vtn_assert(b->entry_point == entry_point);
5242
5243 switch(mode->exec_mode) {
5244 case SpvExecutionModeOriginUpperLeft:
5245 case SpvExecutionModeOriginLowerLeft:
5246 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5247 b->shader->info.fs.origin_upper_left =
5248 (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
5249 break;
5250
5251 case SpvExecutionModeEarlyFragmentTests:
5252 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5253 b->shader->info.fs.early_fragment_tests = true;
5254 break;
5255
5256 case SpvExecutionModePostDepthCoverage:
5257 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5258 b->shader->info.fs.post_depth_coverage = true;
5259 break;
5260
5261 case SpvExecutionModeInvocations:
5262 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5263 b->shader->info.gs.invocations = MAX2(1, mode->operands[0]);
5264 break;
5265
5266 case SpvExecutionModeDepthReplacing:
5267 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5268 if (b->shader->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE)
5269 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
5270 break;
5271 case SpvExecutionModeDepthGreater:
5272 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5273 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
5274 break;
5275 case SpvExecutionModeDepthLess:
5276 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5277 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
5278 break;
5279 case SpvExecutionModeDepthUnchanged:
5280 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5281 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
5282 break;
5283
5284 case SpvExecutionModeLocalSizeHint:
5285 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5286 b->shader->info.cs.workgroup_size_hint[0] = mode->operands[0];
5287 b->shader->info.cs.workgroup_size_hint[1] = mode->operands[1];
5288 b->shader->info.cs.workgroup_size_hint[2] = mode->operands[2];
5289 break;
5290
5291 case SpvExecutionModeLocalSize:
5292 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5293 b->shader->info.workgroup_size[0] = mode->operands[0];
5294 b->shader->info.workgroup_size[1] = mode->operands[1];
5295 b->shader->info.workgroup_size[2] = mode->operands[2];
5296 } else {
5297 vtn_fail("Execution mode LocalSize not supported in stage %s",
5298 _mesa_shader_stage_to_string(b->shader->info.stage));
5299 }
5300 break;
5301
5302 case SpvExecutionModeOutputVertices:
5303 switch (b->shader->info.stage) {
5304 case MESA_SHADER_TESS_CTRL:
5305 case MESA_SHADER_TESS_EVAL:
5306 b->shader->info.tess.tcs_vertices_out = mode->operands[0];
5307 break;
5308 case MESA_SHADER_GEOMETRY:
5309 b->shader->info.gs.vertices_out = mode->operands[0];
5310 break;
5311 case MESA_SHADER_MESH:
5312 b->shader->info.mesh.max_vertices_out = mode->operands[0];
5313 break;
5314 default:
5315 vtn_fail("Execution mode OutputVertices not supported in stage %s",
5316 _mesa_shader_stage_to_string(b->shader->info.stage));
5317 break;
5318 }
5319 break;
5320
5321 case SpvExecutionModeInputPoints:
5322 case SpvExecutionModeInputLines:
5323 case SpvExecutionModeInputLinesAdjacency:
5324 case SpvExecutionModeTriangles:
5325 case SpvExecutionModeInputTrianglesAdjacency:
5326 case SpvExecutionModeQuads:
5327 case SpvExecutionModeIsolines:
5328 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5329 b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
5330 b->shader->info.tess._primitive_mode =
5331 tess_primitive_mode_from_spv_execution_mode(b, mode->exec_mode);
5332 } else {
5333 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5334 b->shader->info.gs.vertices_in =
5335 vertices_in_from_spv_execution_mode(b, mode->exec_mode);
5336 b->shader->info.gs.input_primitive =
5337 primitive_from_spv_execution_mode(b, mode->exec_mode);
5338 }
5339 break;
5340
5341 case SpvExecutionModeOutputPrimitivesNV:
5342 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5343 b->shader->info.mesh.max_primitives_out = mode->operands[0];
5344 break;
5345
5346 case SpvExecutionModeOutputLinesNV:
5347 case SpvExecutionModeOutputTrianglesNV:
5348 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5349 b->shader->info.mesh.primitive_type =
5350 primitive_from_spv_execution_mode(b, mode->exec_mode);
5351 break;
5352
5353 case SpvExecutionModeOutputPoints: {
5354 const unsigned primitive =
5355 primitive_from_spv_execution_mode(b, mode->exec_mode);
5356
5357 switch (b->shader->info.stage) {
5358 case MESA_SHADER_GEOMETRY:
5359 b->shader->info.gs.output_primitive = primitive;
5360 break;
5361 case MESA_SHADER_MESH:
5362 b->shader->info.mesh.primitive_type = primitive;
5363 break;
5364 default:
5365 vtn_fail("Execution mode OutputPoints not supported in stage %s",
5366 _mesa_shader_stage_to_string(b->shader->info.stage));
5367 break;
5368 }
5369 break;
5370 }
5371
5372 case SpvExecutionModeOutputLineStrip:
5373 case SpvExecutionModeOutputTriangleStrip:
5374 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5375 b->shader->info.gs.output_primitive =
5376 primitive_from_spv_execution_mode(b, mode->exec_mode);
5377 break;
5378
5379 case SpvExecutionModeSpacingEqual:
5380 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5381 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5382 b->shader->info.tess.spacing = TESS_SPACING_EQUAL;
5383 break;
5384 case SpvExecutionModeSpacingFractionalEven:
5385 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5386 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5387 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN;
5388 break;
5389 case SpvExecutionModeSpacingFractionalOdd:
5390 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5391 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5392 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD;
5393 break;
5394 case SpvExecutionModeVertexOrderCw:
5395 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5396 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5397 b->shader->info.tess.ccw = false;
5398 break;
5399 case SpvExecutionModeVertexOrderCcw:
5400 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5401 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5402 b->shader->info.tess.ccw = true;
5403 break;
5404 case SpvExecutionModePointMode:
5405 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5406 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5407 b->shader->info.tess.point_mode = true;
5408 break;
5409
5410 case SpvExecutionModePixelCenterInteger:
5411 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5412 b->shader->info.fs.pixel_center_integer = true;
5413 break;
5414
5415 case SpvExecutionModeXfb:
5416 b->shader->info.has_transform_feedback_varyings = true;
5417 break;
5418
5419 case SpvExecutionModeVecTypeHint:
5420 break; /* OpenCL */
5421
5422 case SpvExecutionModeContractionOff:
5423 if (b->shader->info.stage != MESA_SHADER_KERNEL)
5424 vtn_warn("ExectionMode only allowed for CL-style kernels: %s",
5425 spirv_executionmode_to_string(mode->exec_mode));
5426 else
5427 b->exact = true;
5428 break;
5429
5430 case SpvExecutionModeStencilRefReplacingEXT:
5431 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5432 break;
5433
5434 case SpvExecutionModeDerivativeGroupQuadsNV:
5435 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5436 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_QUADS;
5437 break;
5438
5439 case SpvExecutionModeDerivativeGroupLinearNV:
5440 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5441 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_LINEAR;
5442 break;
5443
5444 case SpvExecutionModePixelInterlockOrderedEXT:
5445 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5446 b->shader->info.fs.pixel_interlock_ordered = true;
5447 break;
5448
5449 case SpvExecutionModePixelInterlockUnorderedEXT:
5450 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5451 b->shader->info.fs.pixel_interlock_unordered = true;
5452 break;
5453
5454 case SpvExecutionModeSampleInterlockOrderedEXT:
5455 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5456 b->shader->info.fs.sample_interlock_ordered = true;
5457 break;
5458
5459 case SpvExecutionModeSampleInterlockUnorderedEXT:
5460 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5461 b->shader->info.fs.sample_interlock_unordered = true;
5462 break;
5463
5464 case SpvExecutionModeDenormPreserve:
5465 case SpvExecutionModeDenormFlushToZero:
5466 case SpvExecutionModeSignedZeroInfNanPreserve:
5467 case SpvExecutionModeRoundingModeRTE:
5468 case SpvExecutionModeRoundingModeRTZ: {
5469 unsigned execution_mode = 0;
5470 switch (mode->exec_mode) {
5471 case SpvExecutionModeDenormPreserve:
5472 switch (mode->operands[0]) {
5473 case 16: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP16; break;
5474 case 32: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP32; break;
5475 case 64: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP64; break;
5476 default: vtn_fail("Floating point type not supported");
5477 }
5478 break;
5479 case SpvExecutionModeDenormFlushToZero:
5480 switch (mode->operands[0]) {
5481 case 16: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16; break;
5482 case 32: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32; break;
5483 case 64: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64; break;
5484 default: vtn_fail("Floating point type not supported");
5485 }
5486 break;
5487 case SpvExecutionModeSignedZeroInfNanPreserve:
5488 switch (mode->operands[0]) {
5489 case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break;
5490 case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break;
5491 case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break;
5492 default: vtn_fail("Floating point type not supported");
5493 }
5494 break;
5495 case SpvExecutionModeRoundingModeRTE:
5496 switch (mode->operands[0]) {
5497 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16; break;
5498 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32; break;
5499 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64; break;
5500 default: vtn_fail("Floating point type not supported");
5501 }
5502 break;
5503 case SpvExecutionModeRoundingModeRTZ:
5504 switch (mode->operands[0]) {
5505 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16; break;
5506 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32; break;
5507 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; break;
5508 default: vtn_fail("Floating point type not supported");
5509 }
5510 break;
5511 default:
5512 break;
5513 }
5514
5515 b->shader->info.float_controls_execution_mode |= execution_mode;
5516
5517 for (unsigned bit_size = 16; bit_size <= 64; bit_size *= 2) {
5518 vtn_fail_if(nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, bit_size) &&
5519 nir_is_denorm_preserve(b->shader->info.float_controls_execution_mode, bit_size),
5520 "Cannot flush to zero and preserve denorms for the same bit size.");
5521 vtn_fail_if(nir_is_rounding_mode_rtne(b->shader->info.float_controls_execution_mode, bit_size) &&
5522 nir_is_rounding_mode_rtz(b->shader->info.float_controls_execution_mode, bit_size),
5523 "Cannot set rounding mode to RTNE and RTZ for the same bit size.");
5524 }
5525 break;
5526 }
5527
5528 case SpvExecutionModeMaximallyReconvergesKHR:
5529 b->shader->info.maximally_reconverges = true;
5530 break;
5531
5532 case SpvExecutionModeLocalSizeId:
5533 case SpvExecutionModeLocalSizeHintId:
5534 case SpvExecutionModeSubgroupsPerWorkgroupId:
5535 case SpvExecutionModeMaxNodeRecursionAMDX:
5536 case SpvExecutionModeStaticNumWorkgroupsAMDX:
5537 case SpvExecutionModeMaxNumWorkgroupsAMDX:
5538 case SpvExecutionModeShaderIndexAMDX:
5539 /* Handled later by vtn_handle_execution_mode_id(). */
5540 break;
5541
5542 case SpvExecutionModeSubgroupSize:
5543 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5544 vtn_assert(b->shader->info.subgroup_size == SUBGROUP_SIZE_VARYING);
5545 b->shader->info.subgroup_size = mode->operands[0];
5546 break;
5547
5548 case SpvExecutionModeSubgroupsPerWorkgroup:
5549 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5550 b->shader->info.num_subgroups = mode->operands[0];
5551 break;
5552
5553 case SpvExecutionModeSubgroupUniformControlFlowKHR:
5554 /* There's no corresponding SPIR-V capability, so check here. */
5555 vtn_fail_if(!b->options->caps.subgroup_uniform_control_flow,
5556 "SpvExecutionModeSubgroupUniformControlFlowKHR not supported.");
5557 break;
5558
5559 case SpvExecutionModeEarlyAndLateFragmentTestsAMD:
5560 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5561 b->shader->info.fs.early_and_late_fragment_tests = true;
5562 break;
5563
5564 case SpvExecutionModeStencilRefGreaterFrontAMD:
5565 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5566 b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_GREATER;
5567 break;
5568
5569 case SpvExecutionModeStencilRefLessFrontAMD:
5570 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5571 b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_LESS;
5572 break;
5573
5574 case SpvExecutionModeStencilRefUnchangedFrontAMD:
5575 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5576 b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5577 break;
5578
5579 case SpvExecutionModeStencilRefGreaterBackAMD:
5580 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5581 b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_GREATER;
5582 break;
5583
5584 case SpvExecutionModeStencilRefLessBackAMD:
5585 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5586 b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_LESS;
5587 break;
5588
5589 case SpvExecutionModeStencilRefUnchangedBackAMD:
5590 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5591 b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5592 break;
5593
5594 case SpvExecutionModeRequireFullQuadsKHR:
5595 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5596 b->shader->info.fs.require_full_quads = true;
5597 break;
5598
5599 case SpvExecutionModeQuadDerivativesKHR:
5600 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5601 b->shader->info.fs.quad_derivatives = true;
5602 break;
5603
5604 case SpvExecutionModeCoalescingAMDX:
5605 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5606 b->shader->info.cs.workgroup_count[0] = 1;
5607 b->shader->info.cs.workgroup_count[1] = 1;
5608 b->shader->info.cs.workgroup_count[2] = 1;
5609 break;
5610
5611 default:
5612 vtn_fail("Unhandled execution mode: %s (%u)",
5613 spirv_executionmode_to_string(mode->exec_mode),
5614 mode->exec_mode);
5615 }
5616 }
5617
5618 static void
vtn_handle_execution_mode_id(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5619 vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_point,
5620 const struct vtn_decoration *mode, UNUSED void *data)
5621 {
5622
5623 vtn_assert(b->entry_point == entry_point);
5624
5625 switch (mode->exec_mode) {
5626 case SpvExecutionModeLocalSizeId:
5627 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5628 b->shader->info.workgroup_size[0] = vtn_constant_uint(b, mode->operands[0]);
5629 b->shader->info.workgroup_size[1] = vtn_constant_uint(b, mode->operands[1]);
5630 b->shader->info.workgroup_size[2] = vtn_constant_uint(b, mode->operands[2]);
5631 } else {
5632 vtn_fail("Execution mode LocalSizeId not supported in stage %s",
5633 _mesa_shader_stage_to_string(b->shader->info.stage));
5634 }
5635 break;
5636
5637 case SpvExecutionModeLocalSizeHintId:
5638 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5639 b->shader->info.cs.workgroup_size_hint[0] = vtn_constant_uint(b, mode->operands[0]);
5640 b->shader->info.cs.workgroup_size_hint[1] = vtn_constant_uint(b, mode->operands[1]);
5641 b->shader->info.cs.workgroup_size_hint[2] = vtn_constant_uint(b, mode->operands[2]);
5642 break;
5643
5644 case SpvExecutionModeSubgroupsPerWorkgroupId:
5645 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5646 b->shader->info.num_subgroups = vtn_constant_uint(b, mode->operands[0]);
5647 break;
5648
5649 case SpvExecutionModeMaxNodeRecursionAMDX:
5650 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5651 break;
5652
5653 case SpvExecutionModeStaticNumWorkgroupsAMDX:
5654 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5655 b->shader->info.cs.workgroup_count[0] = vtn_constant_uint(b, mode->operands[0]);
5656 b->shader->info.cs.workgroup_count[1] = vtn_constant_uint(b, mode->operands[1]);
5657 b->shader->info.cs.workgroup_count[2] = vtn_constant_uint(b, mode->operands[2]);
5658 assert(b->shader->info.cs.workgroup_count[0]);
5659 assert(b->shader->info.cs.workgroup_count[1]);
5660 assert(b->shader->info.cs.workgroup_count[2]);
5661 break;
5662
5663 case SpvExecutionModeMaxNumWorkgroupsAMDX:
5664 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5665 break;
5666
5667 case SpvExecutionModeShaderIndexAMDX:
5668 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5669 b->shader->info.cs.shader_index = vtn_constant_uint(b, mode->operands[0]);
5670 break;
5671
5672 default:
5673 /* Nothing to do. Literal execution modes already handled by
5674 * vtn_handle_execution_mode(). */
5675 break;
5676 }
5677 }
5678
5679 static bool
vtn_handle_variable_or_type_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5680 vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
5681 const uint32_t *w, unsigned count)
5682 {
5683 vtn_set_instruction_result_type(b, opcode, w, count);
5684
5685 switch (opcode) {
5686 case SpvOpSource:
5687 case SpvOpSourceContinued:
5688 case SpvOpSourceExtension:
5689 case SpvOpExtension:
5690 case SpvOpCapability:
5691 case SpvOpExtInstImport:
5692 case SpvOpMemoryModel:
5693 case SpvOpEntryPoint:
5694 case SpvOpExecutionMode:
5695 case SpvOpString:
5696 case SpvOpName:
5697 case SpvOpMemberName:
5698 case SpvOpDecorationGroup:
5699 case SpvOpDecorate:
5700 case SpvOpDecorateId:
5701 case SpvOpMemberDecorate:
5702 case SpvOpGroupDecorate:
5703 case SpvOpGroupMemberDecorate:
5704 case SpvOpDecorateString:
5705 case SpvOpMemberDecorateString:
5706 vtn_fail("Invalid opcode types and variables section");
5707 break;
5708
5709 case SpvOpTypeVoid:
5710 case SpvOpTypeBool:
5711 case SpvOpTypeInt:
5712 case SpvOpTypeFloat:
5713 case SpvOpTypeVector:
5714 case SpvOpTypeMatrix:
5715 case SpvOpTypeImage:
5716 case SpvOpTypeSampler:
5717 case SpvOpTypeSampledImage:
5718 case SpvOpTypeArray:
5719 case SpvOpTypeRuntimeArray:
5720 case SpvOpTypeStruct:
5721 case SpvOpTypeOpaque:
5722 case SpvOpTypePointer:
5723 case SpvOpTypeForwardPointer:
5724 case SpvOpTypeFunction:
5725 case SpvOpTypeEvent:
5726 case SpvOpTypeDeviceEvent:
5727 case SpvOpTypeReserveId:
5728 case SpvOpTypeQueue:
5729 case SpvOpTypePipe:
5730 case SpvOpTypeAccelerationStructureKHR:
5731 case SpvOpTypeRayQueryKHR:
5732 case SpvOpTypeCooperativeMatrixKHR:
5733 vtn_handle_type(b, opcode, w, count);
5734 break;
5735
5736 case SpvOpConstantTrue:
5737 case SpvOpConstantFalse:
5738 case SpvOpConstant:
5739 case SpvOpConstantComposite:
5740 case SpvOpConstantNull:
5741 case SpvOpSpecConstantTrue:
5742 case SpvOpSpecConstantFalse:
5743 case SpvOpSpecConstant:
5744 case SpvOpSpecConstantComposite:
5745 case SpvOpSpecConstantOp:
5746 vtn_handle_constant(b, opcode, w, count);
5747 break;
5748
5749 case SpvOpUndef:
5750 case SpvOpVariable:
5751 case SpvOpConstantSampler:
5752 vtn_handle_variables(b, opcode, w, count);
5753 break;
5754
5755 case SpvOpExtInst: {
5756 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5757 /* NonSemantic extended instructions are acceptable in preamble, others
5758 * will indicate the end of preamble.
5759 */
5760 return val->ext_handler == vtn_handle_non_semantic_instruction;
5761 }
5762
5763 default:
5764 return false; /* End of preamble */
5765 }
5766
5767 return true;
5768 }
5769
5770 static struct vtn_ssa_value *
vtn_nir_select(struct vtn_builder * b,struct vtn_ssa_value * src0,struct vtn_ssa_value * src1,struct vtn_ssa_value * src2)5771 vtn_nir_select(struct vtn_builder *b, struct vtn_ssa_value *src0,
5772 struct vtn_ssa_value *src1, struct vtn_ssa_value *src2)
5773 {
5774 struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
5775 dest->type = src1->type;
5776
5777 if (src1->is_variable || src2->is_variable) {
5778 vtn_assert(src1->is_variable && src2->is_variable);
5779
5780 nir_variable *dest_var =
5781 nir_local_variable_create(b->nb.impl, dest->type, "var_select");
5782 nir_deref_instr *dest_deref = nir_build_deref_var(&b->nb, dest_var);
5783
5784 nir_push_if(&b->nb, src0->def);
5785 {
5786 nir_deref_instr *src1_deref = vtn_get_deref_for_ssa_value(b, src1);
5787 vtn_local_store(b, vtn_local_load(b, src1_deref, 0), dest_deref, 0);
5788 }
5789 nir_push_else(&b->nb, NULL);
5790 {
5791 nir_deref_instr *src2_deref = vtn_get_deref_for_ssa_value(b, src2);
5792 vtn_local_store(b, vtn_local_load(b, src2_deref, 0), dest_deref, 0);
5793 }
5794 nir_pop_if(&b->nb, NULL);
5795
5796 vtn_set_ssa_value_var(b, dest, dest_var);
5797 } else if (glsl_type_is_vector_or_scalar(src1->type)) {
5798 dest->def = nir_bcsel(&b->nb, src0->def, src1->def, src2->def);
5799 } else {
5800 unsigned elems = glsl_get_length(src1->type);
5801
5802 dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
5803 for (unsigned i = 0; i < elems; i++) {
5804 dest->elems[i] = vtn_nir_select(b, src0,
5805 src1->elems[i], src2->elems[i]);
5806 }
5807 }
5808
5809 return dest;
5810 }
5811
5812 static void
vtn_handle_select(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5813 vtn_handle_select(struct vtn_builder *b, SpvOp opcode,
5814 const uint32_t *w, unsigned count)
5815 {
5816 /* Handle OpSelect up-front here because it needs to be able to handle
5817 * pointers and not just regular vectors and scalars.
5818 */
5819 struct vtn_value *res_val = vtn_untyped_value(b, w[2]);
5820 struct vtn_value *cond_val = vtn_untyped_value(b, w[3]);
5821 struct vtn_value *obj1_val = vtn_untyped_value(b, w[4]);
5822 struct vtn_value *obj2_val = vtn_untyped_value(b, w[5]);
5823
5824 vtn_fail_if(obj1_val->type != res_val->type ||
5825 obj2_val->type != res_val->type,
5826 "Object types must match the result type in OpSelect (%%%u = %%%u ? %%%u : %%%u)", w[2], w[3], w[4], w[5]);
5827
5828 vtn_fail_if((cond_val->type->base_type != vtn_base_type_scalar &&
5829 cond_val->type->base_type != vtn_base_type_vector) ||
5830 !glsl_type_is_boolean(cond_val->type->type),
5831 "OpSelect must have either a vector of booleans or "
5832 "a boolean as Condition type");
5833
5834 vtn_fail_if(cond_val->type->base_type == vtn_base_type_vector &&
5835 (res_val->type->base_type != vtn_base_type_vector ||
5836 res_val->type->length != cond_val->type->length),
5837 "When Condition type in OpSelect is a vector, the Result "
5838 "type must be a vector of the same length");
5839
5840 switch (res_val->type->base_type) {
5841 case vtn_base_type_scalar:
5842 case vtn_base_type_vector:
5843 case vtn_base_type_matrix:
5844 case vtn_base_type_array:
5845 case vtn_base_type_struct:
5846 /* OK. */
5847 break;
5848 case vtn_base_type_pointer:
5849 /* We need to have actual storage for pointer types. */
5850 vtn_fail_if(res_val->type->type == NULL,
5851 "Invalid pointer result type for OpSelect");
5852 break;
5853 default:
5854 vtn_fail("Result type of OpSelect must be a scalar, composite, or pointer");
5855 }
5856
5857 vtn_push_ssa_value(b, w[2],
5858 vtn_nir_select(b, vtn_ssa_value(b, w[3]),
5859 vtn_ssa_value(b, w[4]),
5860 vtn_ssa_value(b, w[5])));
5861 }
5862
5863 static void
vtn_handle_ptr(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5864 vtn_handle_ptr(struct vtn_builder *b, SpvOp opcode,
5865 const uint32_t *w, unsigned count)
5866 {
5867 struct vtn_type *type1 = vtn_get_value_type(b, w[3]);
5868 struct vtn_type *type2 = vtn_get_value_type(b, w[4]);
5869 vtn_fail_if(type1->base_type != vtn_base_type_pointer ||
5870 type2->base_type != vtn_base_type_pointer,
5871 "%s operands must have pointer types",
5872 spirv_op_to_string(opcode));
5873 vtn_fail_if(type1->storage_class != type2->storage_class,
5874 "%s operands must have the same storage class",
5875 spirv_op_to_string(opcode));
5876
5877 struct vtn_type *vtn_type = vtn_get_type(b, w[1]);
5878 const struct glsl_type *type = vtn_type->type;
5879
5880 nir_address_format addr_format = vtn_mode_to_address_format(
5881 b, vtn_storage_class_to_mode(b, type1->storage_class, NULL, NULL));
5882
5883 nir_def *def;
5884
5885 switch (opcode) {
5886 case SpvOpPtrDiff: {
5887 /* OpPtrDiff returns the difference in number of elements (not byte offset). */
5888 unsigned elem_size, elem_align;
5889 glsl_get_natural_size_align_bytes(type1->deref->type,
5890 &elem_size, &elem_align);
5891
5892 def = nir_build_addr_isub(&b->nb,
5893 vtn_get_nir_ssa(b, w[3]),
5894 vtn_get_nir_ssa(b, w[4]),
5895 addr_format);
5896 def = nir_idiv(&b->nb, def, nir_imm_intN_t(&b->nb, elem_size, def->bit_size));
5897 def = nir_i2iN(&b->nb, def, glsl_get_bit_size(type));
5898 break;
5899 }
5900
5901 case SpvOpPtrEqual:
5902 case SpvOpPtrNotEqual: {
5903 def = nir_build_addr_ieq(&b->nb,
5904 vtn_get_nir_ssa(b, w[3]),
5905 vtn_get_nir_ssa(b, w[4]),
5906 addr_format);
5907 if (opcode == SpvOpPtrNotEqual)
5908 def = nir_inot(&b->nb, def);
5909 break;
5910 }
5911
5912 default:
5913 unreachable("Invalid ptr operation");
5914 }
5915
5916 vtn_push_nir_ssa(b, w[2], def);
5917 }
5918
5919 static void
vtn_handle_ray_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5920 vtn_handle_ray_intrinsic(struct vtn_builder *b, SpvOp opcode,
5921 const uint32_t *w, unsigned count)
5922 {
5923 nir_intrinsic_instr *intrin;
5924
5925 switch (opcode) {
5926 case SpvOpTraceNV:
5927 case SpvOpTraceRayKHR: {
5928 intrin = nir_intrinsic_instr_create(b->nb.shader,
5929 nir_intrinsic_trace_ray);
5930
5931 /* The sources are in the same order in the NIR intrinsic */
5932 for (unsigned i = 0; i < 10; i++)
5933 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
5934
5935 nir_deref_instr *payload;
5936 if (opcode == SpvOpTraceNV)
5937 payload = vtn_get_call_payload_for_location(b, w[11]);
5938 else
5939 payload = vtn_nir_deref(b, w[11]);
5940 intrin->src[10] = nir_src_for_ssa(&payload->def);
5941 nir_builder_instr_insert(&b->nb, &intrin->instr);
5942 break;
5943 }
5944
5945 case SpvOpReportIntersectionKHR: {
5946 intrin = nir_intrinsic_instr_create(b->nb.shader,
5947 nir_intrinsic_report_ray_intersection);
5948 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
5949 intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
5950 nir_def_init(&intrin->instr, &intrin->def, 1, 1);
5951 nir_builder_instr_insert(&b->nb, &intrin->instr);
5952 vtn_push_nir_ssa(b, w[2], &intrin->def);
5953 break;
5954 }
5955
5956 case SpvOpIgnoreIntersectionNV:
5957 intrin = nir_intrinsic_instr_create(b->nb.shader,
5958 nir_intrinsic_ignore_ray_intersection);
5959 nir_builder_instr_insert(&b->nb, &intrin->instr);
5960 break;
5961
5962 case SpvOpTerminateRayNV:
5963 intrin = nir_intrinsic_instr_create(b->nb.shader,
5964 nir_intrinsic_terminate_ray);
5965 nir_builder_instr_insert(&b->nb, &intrin->instr);
5966 break;
5967
5968 case SpvOpExecuteCallableNV:
5969 case SpvOpExecuteCallableKHR: {
5970 intrin = nir_intrinsic_instr_create(b->nb.shader,
5971 nir_intrinsic_execute_callable);
5972 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
5973 nir_deref_instr *payload;
5974 if (opcode == SpvOpExecuteCallableNV)
5975 payload = vtn_get_call_payload_for_location(b, w[2]);
5976 else
5977 payload = vtn_nir_deref(b, w[2]);
5978 intrin->src[1] = nir_src_for_ssa(&payload->def);
5979 nir_builder_instr_insert(&b->nb, &intrin->instr);
5980 break;
5981 }
5982
5983 default:
5984 vtn_fail_with_opcode("Unhandled opcode", opcode);
5985 }
5986 }
5987
5988 static void
vtn_handle_write_packed_primitive_indices(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5989 vtn_handle_write_packed_primitive_indices(struct vtn_builder *b, SpvOp opcode,
5990 const uint32_t *w, unsigned count)
5991 {
5992 vtn_assert(opcode == SpvOpWritePackedPrimitiveIndices4x8NV);
5993
5994 /* TODO(mesh): Use or create a primitive that allow the unpacking to
5995 * happen in the backend. What we have here is functional but too
5996 * blunt.
5997 */
5998
5999 struct vtn_type *offset_type = vtn_get_value_type(b, w[1]);
6000 vtn_fail_if(offset_type->base_type != vtn_base_type_scalar ||
6001 offset_type->type != glsl_uint_type(),
6002 "Index Offset type of OpWritePackedPrimitiveIndices4x8NV "
6003 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
6004
6005 struct vtn_type *packed_type = vtn_get_value_type(b, w[2]);
6006 vtn_fail_if(packed_type->base_type != vtn_base_type_scalar ||
6007 packed_type->type != glsl_uint_type(),
6008 "Packed Indices type of OpWritePackedPrimitiveIndices4x8NV "
6009 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
6010
6011 nir_deref_instr *indices = NULL;
6012 nir_foreach_variable_with_modes(var, b->nb.shader, nir_var_shader_out) {
6013 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) {
6014 indices = nir_build_deref_var(&b->nb, var);
6015 break;
6016 }
6017 }
6018
6019 /* It may be the case that the variable is not present in the
6020 * entry point interface list.
6021 *
6022 * See https://github.com/KhronosGroup/SPIRV-Registry/issues/104.
6023 */
6024
6025 if (!indices) {
6026 unsigned vertices_per_prim =
6027 mesa_vertices_per_prim(b->shader->info.mesh.primitive_type);
6028 unsigned max_prim_indices =
6029 vertices_per_prim * b->shader->info.mesh.max_primitives_out;
6030 const struct glsl_type *t =
6031 glsl_array_type(glsl_uint_type(), max_prim_indices, 0);
6032 nir_variable *var =
6033 nir_variable_create(b->shader, nir_var_shader_out, t,
6034 "gl_PrimitiveIndicesNV");
6035
6036 var->data.location = VARYING_SLOT_PRIMITIVE_INDICES;
6037 var->data.interpolation = INTERP_MODE_NONE;
6038 indices = nir_build_deref_var(&b->nb, var);
6039 }
6040
6041 nir_def *offset = vtn_get_nir_ssa(b, w[1]);
6042 nir_def *packed = vtn_get_nir_ssa(b, w[2]);
6043 nir_def *unpacked = nir_unpack_bits(&b->nb, packed, 8);
6044 for (int i = 0; i < 4; i++) {
6045 nir_deref_instr *offset_deref =
6046 nir_build_deref_array(&b->nb, indices,
6047 nir_iadd_imm(&b->nb, offset, i));
6048 nir_def *val = nir_u2u32(&b->nb, nir_channel(&b->nb, unpacked, i));
6049
6050 nir_store_deref(&b->nb, offset_deref, val, 0x1);
6051 }
6052 }
6053
6054 struct ray_query_value {
6055 nir_ray_query_value nir_value;
6056 const struct glsl_type *glsl_type;
6057 };
6058
6059 static struct ray_query_value
spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode)6060 spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder *b,
6061 SpvOp opcode)
6062 {
6063 switch (opcode) {
6064 #define CASE(_spv, _nir, _type) case SpvOpRayQueryGet##_spv: \
6065 return (struct ray_query_value) { .nir_value = nir_ray_query_value_##_nir, .glsl_type = _type }
6066 CASE(RayTMinKHR, tmin, glsl_floatN_t_type(32));
6067 CASE(RayFlagsKHR, flags, glsl_uint_type());
6068 CASE(WorldRayDirectionKHR, world_ray_direction, glsl_vec_type(3));
6069 CASE(WorldRayOriginKHR, world_ray_origin, glsl_vec_type(3));
6070 CASE(IntersectionTypeKHR, intersection_type, glsl_uint_type());
6071 CASE(IntersectionTKHR, intersection_t, glsl_floatN_t_type(32));
6072 CASE(IntersectionInstanceCustomIndexKHR, intersection_instance_custom_index, glsl_int_type());
6073 CASE(IntersectionInstanceIdKHR, intersection_instance_id, glsl_int_type());
6074 CASE(IntersectionInstanceShaderBindingTableRecordOffsetKHR, intersection_instance_sbt_index, glsl_uint_type());
6075 CASE(IntersectionGeometryIndexKHR, intersection_geometry_index, glsl_int_type());
6076 CASE(IntersectionPrimitiveIndexKHR, intersection_primitive_index, glsl_int_type());
6077 CASE(IntersectionBarycentricsKHR, intersection_barycentrics, glsl_vec_type(2));
6078 CASE(IntersectionFrontFaceKHR, intersection_front_face, glsl_bool_type());
6079 CASE(IntersectionCandidateAABBOpaqueKHR, intersection_candidate_aabb_opaque, glsl_bool_type());
6080 CASE(IntersectionObjectToWorldKHR, intersection_object_to_world, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
6081 CASE(IntersectionWorldToObjectKHR, intersection_world_to_object, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
6082 CASE(IntersectionObjectRayOriginKHR, intersection_object_ray_origin, glsl_vec_type(3));
6083 CASE(IntersectionObjectRayDirectionKHR, intersection_object_ray_direction, glsl_vec_type(3));
6084 CASE(IntersectionTriangleVertexPositionsKHR, intersection_triangle_vertex_positions, glsl_array_type(glsl_vec_type(3), 3,
6085 glsl_get_explicit_stride(glsl_vec_type(3))));
6086 #undef CASE
6087 default:
6088 vtn_fail_with_opcode("Unhandled opcode", opcode);
6089 }
6090 }
6091
6092 static void
ray_query_load_intrinsic_create(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_def * src0,bool committed)6093 ray_query_load_intrinsic_create(struct vtn_builder *b, SpvOp opcode,
6094 const uint32_t *w, nir_def *src0,
6095 bool committed)
6096 {
6097 struct ray_query_value value =
6098 spirv_to_nir_type_ray_query_intrinsic(b, opcode);
6099
6100 if (glsl_type_is_array_or_matrix(value.glsl_type)) {
6101 const struct glsl_type *elem_type = glsl_get_array_element(value.glsl_type);
6102 const unsigned elems = glsl_get_length(value.glsl_type);
6103
6104 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, value.glsl_type);
6105 for (unsigned i = 0; i < elems; i++) {
6106 ssa->elems[i]->def =
6107 nir_rq_load(&b->nb,
6108 glsl_get_vector_elements(elem_type),
6109 glsl_get_bit_size(elem_type),
6110 src0,
6111 .ray_query_value = value.nir_value,
6112 .committed = committed,
6113 .column = i);
6114 }
6115
6116 vtn_push_ssa_value(b, w[2], ssa);
6117 } else {
6118 assert(glsl_type_is_vector_or_scalar(value.glsl_type));
6119
6120 vtn_push_nir_ssa(b, w[2],
6121 nir_rq_load(&b->nb,
6122 glsl_get_vector_elements(value.glsl_type),
6123 glsl_get_bit_size(value.glsl_type),
6124 src0,
6125 .ray_query_value = value.nir_value,
6126 .committed = committed));
6127 }
6128 }
6129
6130 static void
vtn_handle_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6131 vtn_handle_ray_query_intrinsic(struct vtn_builder *b, SpvOp opcode,
6132 const uint32_t *w, unsigned count)
6133 {
6134 switch (opcode) {
6135 case SpvOpRayQueryInitializeKHR: {
6136 nir_intrinsic_instr *intrin =
6137 nir_intrinsic_instr_create(b->nb.shader,
6138 nir_intrinsic_rq_initialize);
6139 /* The sources are in the same order in the NIR intrinsic */
6140 for (unsigned i = 0; i < 8; i++)
6141 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
6142 nir_builder_instr_insert(&b->nb, &intrin->instr);
6143 break;
6144 }
6145
6146 case SpvOpRayQueryTerminateKHR:
6147 nir_rq_terminate(&b->nb, vtn_ssa_value(b, w[1])->def);
6148 break;
6149
6150 case SpvOpRayQueryProceedKHR:
6151 vtn_push_nir_ssa(b, w[2],
6152 nir_rq_proceed(&b->nb, 1, vtn_ssa_value(b, w[3])->def));
6153 break;
6154
6155 case SpvOpRayQueryGenerateIntersectionKHR:
6156 nir_rq_generate_intersection(&b->nb,
6157 vtn_ssa_value(b, w[1])->def,
6158 vtn_ssa_value(b, w[2])->def);
6159 break;
6160
6161 case SpvOpRayQueryConfirmIntersectionKHR:
6162 nir_rq_confirm_intersection(&b->nb, vtn_ssa_value(b, w[1])->def);
6163 break;
6164
6165 case SpvOpRayQueryGetIntersectionTKHR:
6166 case SpvOpRayQueryGetIntersectionTypeKHR:
6167 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6168 case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6169 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6170 case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6171 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6172 case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6173 case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6174 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6175 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6176 case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6177 case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6178 case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6179 ray_query_load_intrinsic_create(b, opcode, w,
6180 vtn_ssa_value(b, w[3])->def,
6181 vtn_constant_uint(b, w[4]));
6182 break;
6183
6184 case SpvOpRayQueryGetRayTMinKHR:
6185 case SpvOpRayQueryGetRayFlagsKHR:
6186 case SpvOpRayQueryGetWorldRayDirectionKHR:
6187 case SpvOpRayQueryGetWorldRayOriginKHR:
6188 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6189 ray_query_load_intrinsic_create(b, opcode, w,
6190 vtn_ssa_value(b, w[3])->def,
6191 /* Committed value is ignored for these */
6192 false);
6193 break;
6194
6195 default:
6196 vtn_fail_with_opcode("Unhandled opcode", opcode);
6197 }
6198 }
6199
6200 static void
vtn_handle_initialize_node_payloads(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6201 vtn_handle_initialize_node_payloads(struct vtn_builder *b, SpvOp opcode,
6202 const uint32_t *w, unsigned count)
6203 {
6204 vtn_assert(opcode == SpvOpInitializeNodePayloadsAMDX);
6205
6206 nir_def *payloads = vtn_ssa_value(b, w[1])->def;
6207 mesa_scope scope = vtn_translate_scope(b, vtn_constant_uint(b, w[2]));
6208 nir_def *payload_count = vtn_ssa_value(b, w[3])->def;
6209 nir_def *node_index = vtn_ssa_value(b, w[4])->def;
6210
6211 nir_initialize_node_payloads(&b->nb, payloads, payload_count, node_index, .execution_scope = scope);
6212 }
6213
6214 static bool
vtn_handle_body_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6215 vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
6216 const uint32_t *w, unsigned count)
6217 {
6218 switch (opcode) {
6219 case SpvOpLabel:
6220 break;
6221
6222 case SpvOpLoopMerge:
6223 case SpvOpSelectionMerge:
6224 /* This is handled by cfg pre-pass and walk_blocks */
6225 break;
6226
6227 case SpvOpUndef: {
6228 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
6229 val->type = vtn_get_type(b, w[1]);
6230 break;
6231 }
6232
6233 case SpvOpExtInst:
6234 vtn_handle_extension(b, opcode, w, count);
6235 break;
6236
6237 case SpvOpVariable:
6238 case SpvOpLoad:
6239 case SpvOpStore:
6240 case SpvOpCopyMemory:
6241 case SpvOpCopyMemorySized:
6242 case SpvOpAccessChain:
6243 case SpvOpPtrAccessChain:
6244 case SpvOpInBoundsAccessChain:
6245 case SpvOpInBoundsPtrAccessChain:
6246 case SpvOpArrayLength:
6247 case SpvOpConvertPtrToU:
6248 case SpvOpConvertUToPtr:
6249 case SpvOpGenericCastToPtrExplicit:
6250 case SpvOpGenericPtrMemSemantics:
6251 case SpvOpSubgroupBlockReadINTEL:
6252 case SpvOpSubgroupBlockWriteINTEL:
6253 case SpvOpConvertUToAccelerationStructureKHR:
6254 vtn_handle_variables(b, opcode, w, count);
6255 break;
6256
6257 case SpvOpFunctionCall:
6258 vtn_handle_function_call(b, opcode, w, count);
6259 break;
6260
6261 case SpvOpSampledImage:
6262 case SpvOpImage:
6263 case SpvOpImageSparseTexelsResident:
6264 case SpvOpImageSampleImplicitLod:
6265 case SpvOpImageSparseSampleImplicitLod:
6266 case SpvOpImageSampleExplicitLod:
6267 case SpvOpImageSparseSampleExplicitLod:
6268 case SpvOpImageSampleDrefImplicitLod:
6269 case SpvOpImageSparseSampleDrefImplicitLod:
6270 case SpvOpImageSampleDrefExplicitLod:
6271 case SpvOpImageSparseSampleDrefExplicitLod:
6272 case SpvOpImageSampleProjImplicitLod:
6273 case SpvOpImageSampleProjExplicitLod:
6274 case SpvOpImageSampleProjDrefImplicitLod:
6275 case SpvOpImageSampleProjDrefExplicitLod:
6276 case SpvOpImageFetch:
6277 case SpvOpImageSparseFetch:
6278 case SpvOpImageGather:
6279 case SpvOpImageSparseGather:
6280 case SpvOpImageDrefGather:
6281 case SpvOpImageSparseDrefGather:
6282 case SpvOpImageQueryLod:
6283 case SpvOpImageQueryLevels:
6284 vtn_handle_texture(b, opcode, w, count);
6285 break;
6286
6287 case SpvOpImageRead:
6288 case SpvOpImageSparseRead:
6289 case SpvOpImageWrite:
6290 case SpvOpImageTexelPointer:
6291 case SpvOpImageQueryFormat:
6292 case SpvOpImageQueryOrder:
6293 vtn_handle_image(b, opcode, w, count);
6294 break;
6295
6296 case SpvOpImageQuerySamples:
6297 case SpvOpImageQuerySizeLod:
6298 case SpvOpImageQuerySize: {
6299 struct vtn_type *image_type = vtn_get_value_type(b, w[3]);
6300 vtn_assert(image_type->base_type == vtn_base_type_image);
6301 if (glsl_type_is_image(image_type->glsl_image)) {
6302 vtn_handle_image(b, opcode, w, count);
6303 } else {
6304 vtn_assert(glsl_type_is_texture(image_type->glsl_image));
6305 vtn_handle_texture(b, opcode, w, count);
6306 }
6307 break;
6308 }
6309
6310 case SpvOpFragmentMaskFetchAMD:
6311 case SpvOpFragmentFetchAMD:
6312 vtn_handle_texture(b, opcode, w, count);
6313 break;
6314
6315 case SpvOpAtomicLoad:
6316 case SpvOpAtomicExchange:
6317 case SpvOpAtomicCompareExchange:
6318 case SpvOpAtomicCompareExchangeWeak:
6319 case SpvOpAtomicIIncrement:
6320 case SpvOpAtomicIDecrement:
6321 case SpvOpAtomicIAdd:
6322 case SpvOpAtomicISub:
6323 case SpvOpAtomicSMin:
6324 case SpvOpAtomicUMin:
6325 case SpvOpAtomicSMax:
6326 case SpvOpAtomicUMax:
6327 case SpvOpAtomicAnd:
6328 case SpvOpAtomicOr:
6329 case SpvOpAtomicXor:
6330 case SpvOpAtomicFAddEXT:
6331 case SpvOpAtomicFMinEXT:
6332 case SpvOpAtomicFMaxEXT:
6333 case SpvOpAtomicFlagTestAndSet: {
6334 struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
6335 if (pointer->value_type == vtn_value_type_image_pointer) {
6336 vtn_handle_image(b, opcode, w, count);
6337 } else {
6338 vtn_assert(pointer->value_type == vtn_value_type_pointer);
6339 vtn_handle_atomics(b, opcode, w, count);
6340 }
6341 break;
6342 }
6343
6344 case SpvOpAtomicStore:
6345 case SpvOpAtomicFlagClear: {
6346 struct vtn_value *pointer = vtn_untyped_value(b, w[1]);
6347 if (pointer->value_type == vtn_value_type_image_pointer) {
6348 vtn_handle_image(b, opcode, w, count);
6349 } else {
6350 vtn_assert(pointer->value_type == vtn_value_type_pointer);
6351 vtn_handle_atomics(b, opcode, w, count);
6352 }
6353 break;
6354 }
6355
6356 case SpvOpSelect:
6357 vtn_handle_select(b, opcode, w, count);
6358 break;
6359
6360 case SpvOpSNegate:
6361 case SpvOpFNegate:
6362 case SpvOpNot:
6363 case SpvOpAny:
6364 case SpvOpAll:
6365 case SpvOpConvertFToU:
6366 case SpvOpConvertFToS:
6367 case SpvOpConvertSToF:
6368 case SpvOpConvertUToF:
6369 case SpvOpUConvert:
6370 case SpvOpSConvert:
6371 case SpvOpFConvert:
6372 case SpvOpQuantizeToF16:
6373 case SpvOpSatConvertSToU:
6374 case SpvOpSatConvertUToS:
6375 case SpvOpPtrCastToGeneric:
6376 case SpvOpGenericCastToPtr:
6377 case SpvOpIsNan:
6378 case SpvOpIsInf:
6379 case SpvOpIsFinite:
6380 case SpvOpIsNormal:
6381 case SpvOpSignBitSet:
6382 case SpvOpLessOrGreater:
6383 case SpvOpOrdered:
6384 case SpvOpUnordered:
6385 case SpvOpIAdd:
6386 case SpvOpFAdd:
6387 case SpvOpISub:
6388 case SpvOpFSub:
6389 case SpvOpIMul:
6390 case SpvOpFMul:
6391 case SpvOpUDiv:
6392 case SpvOpSDiv:
6393 case SpvOpFDiv:
6394 case SpvOpUMod:
6395 case SpvOpSRem:
6396 case SpvOpSMod:
6397 case SpvOpFRem:
6398 case SpvOpFMod:
6399 case SpvOpVectorTimesScalar:
6400 case SpvOpDot:
6401 case SpvOpIAddCarry:
6402 case SpvOpISubBorrow:
6403 case SpvOpUMulExtended:
6404 case SpvOpSMulExtended:
6405 case SpvOpShiftRightLogical:
6406 case SpvOpShiftRightArithmetic:
6407 case SpvOpShiftLeftLogical:
6408 case SpvOpLogicalEqual:
6409 case SpvOpLogicalNotEqual:
6410 case SpvOpLogicalOr:
6411 case SpvOpLogicalAnd:
6412 case SpvOpLogicalNot:
6413 case SpvOpBitwiseOr:
6414 case SpvOpBitwiseXor:
6415 case SpvOpBitwiseAnd:
6416 case SpvOpIEqual:
6417 case SpvOpFOrdEqual:
6418 case SpvOpFUnordEqual:
6419 case SpvOpINotEqual:
6420 case SpvOpFOrdNotEqual:
6421 case SpvOpFUnordNotEqual:
6422 case SpvOpULessThan:
6423 case SpvOpSLessThan:
6424 case SpvOpFOrdLessThan:
6425 case SpvOpFUnordLessThan:
6426 case SpvOpUGreaterThan:
6427 case SpvOpSGreaterThan:
6428 case SpvOpFOrdGreaterThan:
6429 case SpvOpFUnordGreaterThan:
6430 case SpvOpULessThanEqual:
6431 case SpvOpSLessThanEqual:
6432 case SpvOpFOrdLessThanEqual:
6433 case SpvOpFUnordLessThanEqual:
6434 case SpvOpUGreaterThanEqual:
6435 case SpvOpSGreaterThanEqual:
6436 case SpvOpFOrdGreaterThanEqual:
6437 case SpvOpFUnordGreaterThanEqual:
6438 case SpvOpDPdx:
6439 case SpvOpDPdy:
6440 case SpvOpFwidth:
6441 case SpvOpDPdxFine:
6442 case SpvOpDPdyFine:
6443 case SpvOpFwidthFine:
6444 case SpvOpDPdxCoarse:
6445 case SpvOpDPdyCoarse:
6446 case SpvOpFwidthCoarse:
6447 case SpvOpBitFieldInsert:
6448 case SpvOpBitFieldSExtract:
6449 case SpvOpBitFieldUExtract:
6450 case SpvOpBitReverse:
6451 case SpvOpBitCount:
6452 case SpvOpTranspose:
6453 case SpvOpOuterProduct:
6454 case SpvOpMatrixTimesScalar:
6455 case SpvOpVectorTimesMatrix:
6456 case SpvOpMatrixTimesVector:
6457 case SpvOpMatrixTimesMatrix:
6458 case SpvOpUCountLeadingZerosINTEL:
6459 case SpvOpUCountTrailingZerosINTEL:
6460 case SpvOpAbsISubINTEL:
6461 case SpvOpAbsUSubINTEL:
6462 case SpvOpIAddSatINTEL:
6463 case SpvOpUAddSatINTEL:
6464 case SpvOpIAverageINTEL:
6465 case SpvOpUAverageINTEL:
6466 case SpvOpIAverageRoundedINTEL:
6467 case SpvOpUAverageRoundedINTEL:
6468 case SpvOpISubSatINTEL:
6469 case SpvOpUSubSatINTEL:
6470 case SpvOpIMul32x16INTEL:
6471 case SpvOpUMul32x16INTEL:
6472 vtn_handle_alu(b, opcode, w, count);
6473 break;
6474
6475 case SpvOpSDotKHR:
6476 case SpvOpUDotKHR:
6477 case SpvOpSUDotKHR:
6478 case SpvOpSDotAccSatKHR:
6479 case SpvOpUDotAccSatKHR:
6480 case SpvOpSUDotAccSatKHR:
6481 vtn_handle_integer_dot(b, opcode, w, count);
6482 break;
6483
6484 case SpvOpBitcast:
6485 vtn_handle_bitcast(b, w, count);
6486 break;
6487
6488 /* TODO: One day, we should probably do something with this information
6489 * For now, though, it's safe to implement them as no-ops.
6490 * Needed for Rusticl sycl support.
6491 */
6492 case SpvOpAssumeTrueKHR:
6493 break;
6494
6495 case SpvOpExpectKHR:
6496 case SpvOpVectorExtractDynamic:
6497 case SpvOpVectorInsertDynamic:
6498 case SpvOpVectorShuffle:
6499 case SpvOpCompositeConstruct:
6500 case SpvOpCompositeExtract:
6501 case SpvOpCompositeInsert:
6502 case SpvOpCopyLogical:
6503 case SpvOpCopyObject:
6504 vtn_handle_composite(b, opcode, w, count);
6505 break;
6506
6507 case SpvOpEmitVertex:
6508 case SpvOpEndPrimitive:
6509 case SpvOpEmitStreamVertex:
6510 case SpvOpEndStreamPrimitive:
6511 case SpvOpControlBarrier:
6512 case SpvOpMemoryBarrier:
6513 vtn_handle_barrier(b, opcode, w, count);
6514 break;
6515
6516 case SpvOpGroupNonUniformElect:
6517 case SpvOpGroupNonUniformAll:
6518 case SpvOpGroupNonUniformAny:
6519 case SpvOpGroupNonUniformAllEqual:
6520 case SpvOpGroupNonUniformBroadcast:
6521 case SpvOpGroupNonUniformBroadcastFirst:
6522 case SpvOpGroupNonUniformBallot:
6523 case SpvOpGroupNonUniformInverseBallot:
6524 case SpvOpGroupNonUniformBallotBitExtract:
6525 case SpvOpGroupNonUniformBallotBitCount:
6526 case SpvOpGroupNonUniformBallotFindLSB:
6527 case SpvOpGroupNonUniformBallotFindMSB:
6528 case SpvOpGroupNonUniformShuffle:
6529 case SpvOpGroupNonUniformShuffleXor:
6530 case SpvOpGroupNonUniformShuffleUp:
6531 case SpvOpGroupNonUniformShuffleDown:
6532 case SpvOpGroupNonUniformIAdd:
6533 case SpvOpGroupNonUniformFAdd:
6534 case SpvOpGroupNonUniformIMul:
6535 case SpvOpGroupNonUniformFMul:
6536 case SpvOpGroupNonUniformSMin:
6537 case SpvOpGroupNonUniformUMin:
6538 case SpvOpGroupNonUniformFMin:
6539 case SpvOpGroupNonUniformSMax:
6540 case SpvOpGroupNonUniformUMax:
6541 case SpvOpGroupNonUniformFMax:
6542 case SpvOpGroupNonUniformBitwiseAnd:
6543 case SpvOpGroupNonUniformBitwiseOr:
6544 case SpvOpGroupNonUniformBitwiseXor:
6545 case SpvOpGroupNonUniformLogicalAnd:
6546 case SpvOpGroupNonUniformLogicalOr:
6547 case SpvOpGroupNonUniformLogicalXor:
6548 case SpvOpGroupNonUniformQuadBroadcast:
6549 case SpvOpGroupNonUniformQuadSwap:
6550 case SpvOpGroupNonUniformQuadAllKHR:
6551 case SpvOpGroupNonUniformQuadAnyKHR:
6552 case SpvOpGroupAll:
6553 case SpvOpGroupAny:
6554 case SpvOpGroupBroadcast:
6555 case SpvOpGroupIAdd:
6556 case SpvOpGroupFAdd:
6557 case SpvOpGroupFMin:
6558 case SpvOpGroupUMin:
6559 case SpvOpGroupSMin:
6560 case SpvOpGroupFMax:
6561 case SpvOpGroupUMax:
6562 case SpvOpGroupSMax:
6563 case SpvOpSubgroupBallotKHR:
6564 case SpvOpSubgroupFirstInvocationKHR:
6565 case SpvOpSubgroupReadInvocationKHR:
6566 case SpvOpSubgroupAllKHR:
6567 case SpvOpSubgroupAnyKHR:
6568 case SpvOpSubgroupAllEqualKHR:
6569 case SpvOpGroupIAddNonUniformAMD:
6570 case SpvOpGroupFAddNonUniformAMD:
6571 case SpvOpGroupFMinNonUniformAMD:
6572 case SpvOpGroupUMinNonUniformAMD:
6573 case SpvOpGroupSMinNonUniformAMD:
6574 case SpvOpGroupFMaxNonUniformAMD:
6575 case SpvOpGroupUMaxNonUniformAMD:
6576 case SpvOpGroupSMaxNonUniformAMD:
6577 case SpvOpSubgroupShuffleINTEL:
6578 case SpvOpSubgroupShuffleDownINTEL:
6579 case SpvOpSubgroupShuffleUpINTEL:
6580 case SpvOpSubgroupShuffleXorINTEL:
6581 case SpvOpGroupNonUniformRotateKHR:
6582 vtn_handle_subgroup(b, opcode, w, count);
6583 break;
6584
6585 case SpvOpPtrDiff:
6586 case SpvOpPtrEqual:
6587 case SpvOpPtrNotEqual:
6588 vtn_handle_ptr(b, opcode, w, count);
6589 break;
6590
6591 case SpvOpBeginInvocationInterlockEXT:
6592 nir_begin_invocation_interlock(&b->nb);
6593 break;
6594
6595 case SpvOpEndInvocationInterlockEXT:
6596 nir_end_invocation_interlock(&b->nb);
6597 break;
6598
6599 case SpvOpDemoteToHelperInvocation: {
6600 nir_demote(&b->nb);
6601 break;
6602 }
6603
6604 case SpvOpIsHelperInvocationEXT: {
6605 vtn_push_nir_ssa(b, w[2], nir_is_helper_invocation(&b->nb, 1));
6606 break;
6607 }
6608
6609 case SpvOpReadClockKHR: {
6610 SpvScope scope = vtn_constant_uint(b, w[3]);
6611 vtn_fail_if(scope != SpvScopeDevice && scope != SpvScopeSubgroup,
6612 "OpReadClockKHR Scope must be either "
6613 "ScopeDevice or ScopeSubgroup.");
6614
6615 /* Operation supports two result types: uvec2 and uint64_t. The NIR
6616 * intrinsic gives uvec2, so pack the result for the other case.
6617 */
6618 nir_def *result = nir_shader_clock(&b->nb, vtn_translate_scope(b, scope));
6619
6620 struct vtn_type *type = vtn_get_type(b, w[1]);
6621 const struct glsl_type *dest_type = type->type;
6622
6623 if (glsl_type_is_vector(dest_type)) {
6624 assert(dest_type == glsl_vector_type(GLSL_TYPE_UINT, 2));
6625 } else {
6626 assert(glsl_type_is_scalar(dest_type));
6627 assert(glsl_get_base_type(dest_type) == GLSL_TYPE_UINT64);
6628 result = nir_pack_64_2x32(&b->nb, result);
6629 }
6630
6631 vtn_push_nir_ssa(b, w[2], result);
6632 break;
6633 }
6634
6635 case SpvOpTraceNV:
6636 case SpvOpTraceRayKHR:
6637 case SpvOpReportIntersectionKHR:
6638 case SpvOpIgnoreIntersectionNV:
6639 case SpvOpTerminateRayNV:
6640 case SpvOpExecuteCallableNV:
6641 case SpvOpExecuteCallableKHR:
6642 vtn_handle_ray_intrinsic(b, opcode, w, count);
6643 break;
6644
6645 case SpvOpRayQueryInitializeKHR:
6646 case SpvOpRayQueryTerminateKHR:
6647 case SpvOpRayQueryGenerateIntersectionKHR:
6648 case SpvOpRayQueryConfirmIntersectionKHR:
6649 case SpvOpRayQueryProceedKHR:
6650 case SpvOpRayQueryGetIntersectionTypeKHR:
6651 case SpvOpRayQueryGetRayTMinKHR:
6652 case SpvOpRayQueryGetRayFlagsKHR:
6653 case SpvOpRayQueryGetIntersectionTKHR:
6654 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6655 case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6656 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6657 case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6658 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6659 case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6660 case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6661 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6662 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6663 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6664 case SpvOpRayQueryGetWorldRayDirectionKHR:
6665 case SpvOpRayQueryGetWorldRayOriginKHR:
6666 case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6667 case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6668 case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6669 vtn_handle_ray_query_intrinsic(b, opcode, w, count);
6670 break;
6671
6672 case SpvOpLifetimeStart:
6673 case SpvOpLifetimeStop:
6674 break;
6675
6676 case SpvOpGroupAsyncCopy:
6677 case SpvOpGroupWaitEvents:
6678 vtn_handle_opencl_core_instruction(b, opcode, w, count);
6679 break;
6680
6681 case SpvOpWritePackedPrimitiveIndices4x8NV:
6682 vtn_handle_write_packed_primitive_indices(b, opcode, w, count);
6683 break;
6684
6685 case SpvOpSetMeshOutputsEXT:
6686 nir_set_vertex_and_primitive_count(
6687 &b->nb, vtn_get_nir_ssa(b, w[1]), vtn_get_nir_ssa(b, w[2]),
6688 nir_undef(&b->nb, 1, 32));
6689 break;
6690
6691 case SpvOpInitializeNodePayloadsAMDX:
6692 vtn_handle_initialize_node_payloads(b, opcode, w, count);
6693 break;
6694
6695 case SpvOpFinalizeNodePayloadsAMDX:
6696 break;
6697
6698 case SpvOpFinishWritingNodePayloadAMDX:
6699 break;
6700
6701 case SpvOpCooperativeMatrixLoadKHR:
6702 case SpvOpCooperativeMatrixStoreKHR:
6703 case SpvOpCooperativeMatrixLengthKHR:
6704 case SpvOpCooperativeMatrixMulAddKHR:
6705 vtn_handle_cooperative_instruction(b, opcode, w, count);
6706 break;
6707
6708 default:
6709 vtn_fail_with_opcode("Unhandled opcode", opcode);
6710 }
6711
6712 return true;
6713 }
6714
6715 static bool
is_glslang(const struct vtn_builder * b)6716 is_glslang(const struct vtn_builder *b)
6717 {
6718 return b->generator_id == vtn_generator_glslang_reference_front_end ||
6719 b->generator_id == vtn_generator_shaderc_over_glslang;
6720 }
6721
6722 struct vtn_builder*
vtn_create_builder(const uint32_t * words,size_t word_count,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options)6723 vtn_create_builder(const uint32_t *words, size_t word_count,
6724 gl_shader_stage stage, const char *entry_point_name,
6725 const struct spirv_to_nir_options *options)
6726 {
6727 /* Initialize the vtn_builder object */
6728 struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
6729
6730 b->spirv = words;
6731 b->spirv_word_count = word_count;
6732 b->file = NULL;
6733 b->line = -1;
6734 b->col = -1;
6735 list_inithead(&b->functions);
6736 b->entry_point_stage = stage;
6737 b->entry_point_name = entry_point_name;
6738
6739 /*
6740 * Handle the SPIR-V header (first 5 dwords).
6741 * Can't use vtx_assert() as the setjmp(3) target isn't initialized yet.
6742 */
6743 if (word_count <= 5)
6744 goto fail;
6745
6746 if (words[0] != SpvMagicNumber) {
6747 vtn_err("words[0] was 0x%x, want 0x%x", words[0], SpvMagicNumber);
6748 goto fail;
6749 }
6750
6751 b->version = words[1];
6752 if (b->version < 0x10000) {
6753 vtn_err("version was 0x%x, want >= 0x10000", b->version);
6754 goto fail;
6755 }
6756
6757 b->generator_id = words[2] >> 16;
6758 uint16_t generator_version = words[2];
6759
6760 unsigned value_id_bound = words[3];
6761 if (words[4] != 0) {
6762 vtn_err("words[4] was %u, want 0", words[4]);
6763 goto fail;
6764 }
6765
6766 b->value_id_bound = value_id_bound;
6767
6768 /* Allocate all the data that can be dropped after parsing using
6769 * a cheaper allocation strategy. Use the value_id_bound and the
6770 * size of the common internal structs to approximate a good
6771 * buffer_size.
6772 */
6773 const linear_opts lin_opts = {
6774 .min_buffer_size = 2 * value_id_bound * (sizeof(struct vtn_value) +
6775 sizeof(struct vtn_ssa_value)),
6776 };
6777 b->lin_ctx = linear_context_with_opts(b, &lin_opts);
6778
6779 struct spirv_to_nir_options *dup_options =
6780 vtn_alloc(b, struct spirv_to_nir_options);
6781 *dup_options = *options;
6782
6783 b->options = dup_options;
6784 b->values = vtn_zalloc_array(b, struct vtn_value, value_id_bound);
6785
6786
6787 /* In GLSLang commit 8297936dd6eb3, their handling of barrier() was fixed
6788 * to provide correct memory semantics on compute shader barrier()
6789 * commands. Prior to that, we need to fix them up ourselves. This
6790 * GLSLang fix caused them to bump to generator version 3.
6791 */
6792 b->wa_glslang_cs_barrier = is_glslang(b) && generator_version < 3;
6793
6794 /* Identifying the LLVM-SPIRV translator:
6795 *
6796 * The LLVM-SPIRV translator currently doesn't store any generator ID [1].
6797 * Our use case involving the SPIRV-Tools linker also mean we want to check
6798 * for that tool instead. Finally the SPIRV-Tools linker also stores its
6799 * generator ID in the wrong location [2].
6800 *
6801 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1223
6802 * [2] : https://github.com/KhronosGroup/SPIRV-Tools/pull/4549
6803 */
6804 const bool is_llvm_spirv_translator =
6805 (b->generator_id == 0 &&
6806 generator_version == vtn_generator_spirv_tools_linker) ||
6807 b->generator_id == vtn_generator_spirv_tools_linker;
6808
6809 /* The LLVM-SPIRV translator generates Undef initializers for _local
6810 * variables [1].
6811 *
6812 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1224
6813 */
6814 b->wa_llvm_spirv_ignore_workgroup_initializer =
6815 b->options->environment == NIR_SPIRV_OPENCL && is_llvm_spirv_translator;
6816
6817 /* Older versions of GLSLang would incorrectly emit OpReturn after
6818 * OpEmitMeshTasksEXT. This is incorrect since the latter is already
6819 * a terminator instruction.
6820 *
6821 * See https://github.com/KhronosGroup/glslang/issues/3020 for details.
6822 *
6823 * Clay Shader Compiler (used by GravityMark) is also affected.
6824 */
6825 b->wa_ignore_return_after_emit_mesh_tasks =
6826 (is_glslang(b) && generator_version < 11) ||
6827 (b->generator_id == vtn_generator_clay_shader_compiler &&
6828 generator_version < 18);
6829
6830 if (b->options->environment == NIR_SPIRV_VULKAN && b->version < 0x10400)
6831 b->vars_used_indirectly = _mesa_pointer_set_create(b);
6832
6833 if (b->options->environment == NIR_SPIRV_VULKAN)
6834 b->vars_used_indirectly = _mesa_pointer_set_create(b);
6835
6836 return b;
6837 fail:
6838 ralloc_free(b);
6839 return NULL;
6840 }
6841
6842 static nir_function *
vtn_emit_kernel_entry_point_wrapper(struct vtn_builder * b,nir_function * entry_point)6843 vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b,
6844 nir_function *entry_point)
6845 {
6846 vtn_assert(entry_point == b->entry_point->func->nir_func);
6847 vtn_fail_if(!entry_point->name, "entry points are required to have a name");
6848 const char *func_name =
6849 ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name);
6850
6851 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
6852
6853 nir_function *main_entry_point = nir_function_create(b->shader, func_name);
6854 nir_function_impl *impl = nir_function_impl_create(main_entry_point);
6855 b->nb = nir_builder_at(nir_after_impl(impl));
6856 b->func_param_idx = 0;
6857
6858 nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point);
6859
6860 for (unsigned i = 0; i < entry_point->num_params; ++i) {
6861 struct vtn_type *param_type = b->entry_point->func->type->params[i];
6862
6863 b->shader->info.cs.has_variable_shared_mem |=
6864 param_type->storage_class == SpvStorageClassWorkgroup;
6865
6866 /* consider all pointers to function memory to be parameters passed
6867 * by value
6868 */
6869 bool is_by_val = param_type->base_type == vtn_base_type_pointer &&
6870 param_type->storage_class == SpvStorageClassFunction;
6871
6872 /* input variable */
6873 nir_variable *in_var = rzalloc(b->nb.shader, nir_variable);
6874
6875 if (is_by_val) {
6876 in_var->data.mode = nir_var_uniform;
6877 in_var->type = param_type->deref->type;
6878 } else if (param_type->base_type == vtn_base_type_image) {
6879 in_var->data.mode = nir_var_image;
6880 in_var->type = param_type->glsl_image;
6881 in_var->data.access =
6882 spirv_to_gl_access_qualifier(b, param_type->access_qualifier);
6883 } else if (param_type->base_type == vtn_base_type_sampler) {
6884 in_var->data.mode = nir_var_uniform;
6885 in_var->type = glsl_bare_sampler_type();
6886 } else {
6887 in_var->data.mode = nir_var_uniform;
6888 in_var->type = param_type->type;
6889 }
6890
6891 in_var->data.read_only = true;
6892 in_var->data.location = i;
6893
6894 nir_shader_add_variable(b->nb.shader, in_var);
6895
6896 /* we have to copy the entire variable into function memory */
6897 if (is_by_val) {
6898 nir_variable *copy_var =
6899 nir_local_variable_create(impl, in_var->type, "copy_in");
6900 nir_copy_var(&b->nb, copy_var, in_var);
6901 call->params[i] =
6902 nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->def);
6903 } else if (param_type->base_type == vtn_base_type_image ||
6904 param_type->base_type == vtn_base_type_sampler) {
6905 /* Don't load the var, just pass a deref of it */
6906 call->params[i] = nir_src_for_ssa(&nir_build_deref_var(&b->nb, in_var)->def);
6907 } else {
6908 call->params[i] = nir_src_for_ssa(nir_load_var(&b->nb, in_var));
6909 }
6910 }
6911
6912 nir_builder_instr_insert(&b->nb, &call->instr);
6913
6914 return main_entry_point;
6915 }
6916
6917 static bool
can_remove(nir_variable * var,void * data)6918 can_remove(nir_variable *var, void *data)
6919 {
6920 const struct set *vars_used_indirectly = data;
6921 return !_mesa_set_search(vars_used_indirectly, var);
6922 }
6923
6924 #ifndef NDEBUG
6925 static void
initialize_mesa_spirv_debug(void)6926 initialize_mesa_spirv_debug(void)
6927 {
6928 mesa_spirv_debug = debug_get_option_mesa_spirv_debug();
6929 }
6930 #endif
6931
6932 nir_shader *
spirv_to_nir(const uint32_t * words,size_t word_count,struct nir_spirv_specialization * spec,unsigned num_spec,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options,const nir_shader_compiler_options * nir_options)6933 spirv_to_nir(const uint32_t *words, size_t word_count,
6934 struct nir_spirv_specialization *spec, unsigned num_spec,
6935 gl_shader_stage stage, const char *entry_point_name,
6936 const struct spirv_to_nir_options *options,
6937 const nir_shader_compiler_options *nir_options)
6938
6939 {
6940 #ifndef NDEBUG
6941 static once_flag initialized_debug_flag = ONCE_FLAG_INIT;
6942 call_once(&initialized_debug_flag, initialize_mesa_spirv_debug);
6943 #endif
6944
6945 const uint32_t *word_end = words + word_count;
6946
6947 struct vtn_builder *b = vtn_create_builder(words, word_count,
6948 stage, entry_point_name,
6949 options);
6950
6951 if (b == NULL)
6952 return NULL;
6953
6954 /* See also _vtn_fail() */
6955 if (vtn_setjmp(b->fail_jump)) {
6956 ralloc_free(b);
6957 return NULL;
6958 }
6959
6960 const char *dump_path = secure_getenv("MESA_SPIRV_DUMP_PATH");
6961 if (dump_path)
6962 vtn_dump_shader(b, dump_path, "spirv");
6963
6964 b->shader = nir_shader_create(b, stage, nir_options, NULL);
6965 b->shader->info.subgroup_size = options->subgroup_size;
6966 b->shader->info.float_controls_execution_mode = options->float_controls_execution_mode;
6967 b->shader->info.cs.shader_index = options->shader_index;
6968 _mesa_sha1_compute(words, word_count * sizeof(uint32_t), b->shader->info.source_sha1);
6969
6970 /* Skip the SPIR-V header, handled at vtn_create_builder */
6971 words+= 5;
6972
6973 /* Handle all the preamble instructions */
6974 words = vtn_foreach_instruction(b, words, word_end,
6975 vtn_handle_preamble_instruction);
6976
6977 /* DirectXShaderCompiler and glslang/shaderc both create OpKill from HLSL's
6978 * discard/clip, which uses demote semantics. DirectXShaderCompiler will use
6979 * demote if the extension is enabled, so we disable this workaround in that
6980 * case.
6981 *
6982 * Related glslang issue: https://github.com/KhronosGroup/glslang/issues/2416
6983 */
6984 bool dxsc = b->generator_id == vtn_generator_spiregg;
6985 b->convert_discard_to_demote = ((dxsc && !b->uses_demote_to_helper_invocation) ||
6986 (is_glslang(b) && b->source_lang == SpvSourceLanguageHLSL)) &&
6987 options->caps.demote_to_helper_invocation;
6988
6989 if (!options->create_library && b->entry_point == NULL) {
6990 vtn_fail("Entry point not found for %s shader \"%s\"",
6991 _mesa_shader_stage_to_string(stage), entry_point_name);
6992 ralloc_free(b);
6993 return NULL;
6994 }
6995
6996 /* Ensure a sane address mode is being used for function temps */
6997 assert(nir_address_format_bit_size(b->options->temp_addr_format) == nir_get_ptr_bitsize(b->shader));
6998 assert(nir_address_format_num_components(b->options->temp_addr_format) == 1);
6999
7000 /* Set shader info defaults */
7001 if (stage == MESA_SHADER_GEOMETRY)
7002 b->shader->info.gs.invocations = 1;
7003
7004 /* Parse execution modes. */
7005 if (!options->create_library)
7006 vtn_foreach_execution_mode(b, b->entry_point,
7007 vtn_handle_execution_mode, NULL);
7008
7009 b->specializations = spec;
7010 b->num_specializations = num_spec;
7011
7012 /* Handle all variable, type, and constant instructions */
7013 words = vtn_foreach_instruction(b, words, word_end,
7014 vtn_handle_variable_or_type_instruction);
7015
7016 /* Parse execution modes that depend on IDs. Must happen after we have
7017 * constants parsed.
7018 */
7019 if (!options->create_library)
7020 vtn_foreach_execution_mode(b, b->entry_point,
7021 vtn_handle_execution_mode_id, NULL);
7022
7023 if (b->workgroup_size_builtin) {
7024 vtn_assert(gl_shader_stage_uses_workgroup(stage));
7025 vtn_assert(b->workgroup_size_builtin->type->type ==
7026 glsl_vector_type(GLSL_TYPE_UINT, 3));
7027
7028 nir_const_value *const_size =
7029 b->workgroup_size_builtin->constant->values;
7030
7031 b->shader->info.workgroup_size[0] = const_size[0].u32;
7032 b->shader->info.workgroup_size[1] = const_size[1].u32;
7033 b->shader->info.workgroup_size[2] = const_size[2].u32;
7034 }
7035
7036 /* Set types on all vtn_values */
7037 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
7038
7039 vtn_build_cfg(b, words, word_end);
7040
7041 if (!options->create_library) {
7042 assert(b->entry_point->value_type == vtn_value_type_function);
7043 b->entry_point->func->referenced = true;
7044 }
7045
7046 bool progress;
7047 do {
7048 progress = false;
7049 vtn_foreach_function(func, &b->functions) {
7050 if ((options->create_library || func->referenced) && !func->emitted) {
7051 vtn_function_emit(b, func, vtn_handle_body_instruction);
7052 progress = true;
7053 }
7054 }
7055 } while (progress);
7056
7057 if (!options->create_library) {
7058 vtn_assert(b->entry_point->value_type == vtn_value_type_function);
7059 nir_function *entry_point = b->entry_point->func->nir_func;
7060 vtn_assert(entry_point);
7061
7062 entry_point->dont_inline = false;
7063 /* post process entry_points with input params */
7064 if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL)
7065 entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point);
7066
7067 entry_point->is_entrypoint = true;
7068 }
7069
7070 /* structurize the CFG */
7071 nir_lower_goto_ifs(b->shader);
7072
7073 nir_validate_shader(b->shader, "after spirv cfg");
7074
7075 nir_lower_continue_constructs(b->shader);
7076
7077 /* A SPIR-V module can have multiple shaders stages and also multiple
7078 * shaders of the same stage. Global variables are declared per-module.
7079 *
7080 * Starting in SPIR-V 1.4 the list of global variables is part of
7081 * OpEntryPoint, so only valid ones will be created. Previous versions
7082 * only have Input and Output variables listed, so remove dead variables to
7083 * clean up the remaining ones.
7084 */
7085 if (!options->create_library && b->version < 0x10400) {
7086 const nir_remove_dead_variables_options dead_opts = {
7087 .can_remove_var = can_remove,
7088 .can_remove_var_data = b->vars_used_indirectly,
7089 };
7090 nir_remove_dead_variables(b->shader, ~(nir_var_function_temp |
7091 nir_var_shader_out |
7092 nir_var_shader_in |
7093 nir_var_system_value),
7094 b->vars_used_indirectly ? &dead_opts : NULL);
7095 }
7096
7097 nir_foreach_variable_in_shader(var, b->shader) {
7098 switch (var->data.mode) {
7099 case nir_var_mem_ubo:
7100 b->shader->info.num_ubos++;
7101 break;
7102 case nir_var_mem_ssbo:
7103 b->shader->info.num_ssbos++;
7104 break;
7105 case nir_var_mem_push_const:
7106 vtn_assert(b->shader->num_uniforms == 0);
7107 b->shader->num_uniforms =
7108 glsl_get_explicit_size(glsl_without_array(var->type), false);
7109 break;
7110 }
7111 }
7112
7113 /* We sometimes generate bogus derefs that, while never used, give the
7114 * validator a bit of heartburn. Run dead code to get rid of them.
7115 */
7116 nir_opt_dce(b->shader);
7117
7118 /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is
7119 * a Block, all of them will be and Blocks are explicitly laid out.
7120 */
7121 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7122 if (glsl_type_is_interface(var->type)) {
7123 assert(b->options->caps.workgroup_memory_explicit_layout);
7124 b->shader->info.shared_memory_explicit_layout = true;
7125 break;
7126 }
7127 }
7128 if (b->shader->info.shared_memory_explicit_layout) {
7129 unsigned size = 0;
7130 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7131 assert(glsl_type_is_interface(var->type));
7132 const bool align_to_stride = false;
7133 size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
7134 }
7135 b->shader->info.shared_size = size;
7136 }
7137
7138 if (stage == MESA_SHADER_FRAGMENT) {
7139 /* From the Vulkan 1.2.199 spec:
7140 *
7141 * "If a fragment shader entry point’s interface includes an input
7142 * variable decorated with SamplePosition, Sample Shading is
7143 * considered enabled with a minSampleShading value of 1.0."
7144 *
7145 * Similar text exists for SampleId. Regarding the Sample decoration,
7146 * the Vulkan 1.2.199 spec says:
7147 *
7148 * "If a fragment shader input is decorated with Sample, a separate
7149 * value must be assigned to that variable for each covered sample in
7150 * the fragment, and that value must be sampled at the location of
7151 * the individual sample. When rasterizationSamples is
7152 * VK_SAMPLE_COUNT_1_BIT, the fragment center must be used for
7153 * Centroid, Sample, and undecorated attribute interpolation."
7154 *
7155 * Unfortunately, this isn't quite as clear about static use and the
7156 * interface but the static use check should be valid.
7157 *
7158 * For OpenGL, similar language exists but it's all more wishy-washy.
7159 * We'll assume the same behavior across APIs.
7160 */
7161 nir_foreach_variable_with_modes(var, b->shader,
7162 nir_var_shader_in |
7163 nir_var_system_value) {
7164 struct nir_variable_data *members =
7165 var->members ? var->members : &var->data;
7166 uint16_t num_members = var->members ? var->num_members : 1;
7167 for (uint16_t i = 0; i < num_members; i++) {
7168 if (members[i].mode == nir_var_system_value &&
7169 (members[i].location == SYSTEM_VALUE_SAMPLE_ID ||
7170 members[i].location == SYSTEM_VALUE_SAMPLE_POS))
7171 b->shader->info.fs.uses_sample_shading = true;
7172
7173 if (members[i].mode == nir_var_shader_in && members[i].sample)
7174 b->shader->info.fs.uses_sample_shading = true;
7175 }
7176 }
7177 }
7178
7179 /* Work around applications that declare shader_call_data variables inside
7180 * ray generation shaders.
7181 *
7182 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/5326
7183 */
7184 if (stage == MESA_SHADER_RAYGEN)
7185 NIR_PASS(_, b->shader, nir_remove_dead_variables, nir_var_shader_call_data,
7186 NULL);
7187
7188 /* Unparent the shader from the vtn_builder before we delete the builder */
7189 ralloc_steal(NULL, b->shader);
7190
7191 nir_shader *shader = b->shader;
7192 ralloc_free(b);
7193
7194 return shader;
7195 }
7196
7197 static bool
func_to_nir_builder(FILE * fp,struct vtn_function * func)7198 func_to_nir_builder(FILE *fp, struct vtn_function *func)
7199 {
7200 nir_function *nir_func = func->nir_func;
7201 struct vtn_type *return_type = func->type->return_type;
7202 bool returns = return_type->base_type != vtn_base_type_void;
7203
7204 if (returns && return_type->base_type != vtn_base_type_scalar &&
7205 return_type->base_type != vtn_base_type_vector) {
7206 fprintf(stderr, "Unsupported return type for %s", nir_func->name);
7207 return false;
7208 }
7209
7210 /* If there is a return type, the first NIR parameter is the return deref,
7211 * so offset by that for logical parameter iteration.
7212 */
7213 unsigned first_param = returns ? 1 : 0;
7214
7215 /* Generate function signature */
7216 fprintf(fp, "static inline %s\n", returns ? "nir_def *": "void");
7217 fprintf(fp, "%s(nir_builder *b", nir_func->name);
7218
7219 /* TODO: Can we recover parameter names? */
7220 for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7221 fprintf(fp, ", nir_def *arg%u", i);
7222 }
7223
7224 fprintf(fp, ")\n{\n");
7225
7226 /* Validate inputs. nir_validate will do this too, but the
7227 * errors/backtraces from these asserts should be nicer.
7228 */
7229 for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7230 nir_parameter *param = &nir_func->params[i];
7231 fprintf(fp, " assert(arg%u->bit_size == %u);\n", i, param->bit_size);
7232 fprintf(fp, " assert(arg%u->num_components == %u);\n", i,
7233 param->num_components);
7234 fprintf(fp, "\n");
7235 }
7236
7237 /* Find the function to call. If not found, create a prototype */
7238 fprintf(fp, " nir_function *func = nir_shader_get_function_for_name(b->shader, \"%s\");\n",
7239 nir_func->name);
7240 fprintf(fp, "\n");
7241 fprintf(fp, " if (!func) {\n");
7242 fprintf(fp, " func = nir_function_create(b->shader, \"%s\");\n",
7243 nir_func->name);
7244 fprintf(fp, " func->num_params = %u;\n", nir_func->num_params);
7245 fprintf(fp, " func->params = ralloc_array(b->shader, nir_parameter, func->num_params);\n");
7246
7247 for (unsigned i = 0; i < nir_func->num_params; ++i) {
7248 fprintf(fp, "\n");
7249 fprintf(fp, " func->params[%u].bit_size = %u;\n", i,
7250 nir_func->params[i].bit_size);
7251 fprintf(fp, " func->params[%u].num_components = %u;\n", i,
7252 nir_func->params[i].num_components);
7253 }
7254
7255 fprintf(fp, " }\n\n");
7256
7257
7258 if (returns) {
7259 /* We assume that vec3 variables are lowered to vec4. Mirror that here so
7260 * we don't need to lower vec3 to vec4 again at link-time.
7261 */
7262 assert(glsl_type_is_vector_or_scalar(return_type->type));
7263 unsigned elements = return_type->type->vector_elements;
7264 if (elements == 3)
7265 elements = 4;
7266
7267 /* Reconstruct the return type. */
7268 fprintf(fp, " const struct glsl_type *ret_type = glsl_vector_type(%u, %u);\n",
7269 return_type->type->base_type, elements);
7270
7271 /* With the type, we can make a variable and get a deref to pass in */
7272 fprintf(fp, " nir_variable *ret = nir_local_variable_create(b->impl, ret_type, \"return\");\n");
7273 fprintf(fp, " nir_deref_instr *deref = nir_build_deref_var(b, ret);\n");
7274
7275 /* XXX: This is a hack due to ptr size differing between KERNEL and other
7276 * shader stages. This needs to be fixed in core NIR.
7277 */
7278 fprintf(fp, " deref->def.bit_size = %u;\n", nir_func->params[0].bit_size);
7279 fprintf(fp, "\n");
7280 }
7281
7282 /* Call the function */
7283 fprintf(fp, " nir_call(b, func");
7284
7285 if (returns)
7286 fprintf(fp, ", &deref->def");
7287
7288 for (unsigned i = first_param; i < nir_func->num_params; ++i)
7289 fprintf(fp, ", arg%u", i);
7290
7291 fprintf(fp, ");\n");
7292
7293 /* Load the return value if any, undoing the vec3->vec4 lowering. */
7294 if (returns) {
7295 fprintf(fp, "\n");
7296
7297 if (return_type->type->vector_elements == 3)
7298 fprintf(fp, " return nir_trim_vector(b, nir_load_deref(b, deref), 3);\n");
7299 else
7300 fprintf(fp, " return nir_load_deref(b, deref);\n");
7301 }
7302
7303 fprintf(fp, "}\n\n");
7304 return true;
7305 }
7306
7307 bool
spirv_library_to_nir_builder(FILE * fp,const uint32_t * words,size_t word_count,const struct spirv_to_nir_options * options)7308 spirv_library_to_nir_builder(FILE *fp, const uint32_t *words, size_t word_count,
7309 const struct spirv_to_nir_options *options)
7310 {
7311 #ifndef NDEBUG
7312 static once_flag initialized_debug_flag = ONCE_FLAG_INIT;
7313 call_once(&initialized_debug_flag, initialize_mesa_spirv_debug);
7314 #endif
7315
7316 const uint32_t *word_end = words + word_count;
7317
7318 struct vtn_builder *b = vtn_create_builder(words, word_count,
7319 MESA_SHADER_KERNEL, "placeholder name",
7320 options);
7321
7322 if (b == NULL)
7323 return false;
7324
7325 /* See also _vtn_fail() */
7326 if (vtn_setjmp(b->fail_jump)) {
7327 ralloc_free(b);
7328 return false;
7329 }
7330
7331 b->shader = nir_shader_create(b, MESA_SHADER_KERNEL,
7332 &(const nir_shader_compiler_options){0}, NULL);
7333
7334 /* Skip the SPIR-V header, handled at vtn_create_builder */
7335 words+= 5;
7336
7337 /* Handle all the preamble instructions */
7338 words = vtn_foreach_instruction(b, words, word_end,
7339 vtn_handle_preamble_instruction);
7340
7341 /* Handle all variable, type, and constant instructions */
7342 words = vtn_foreach_instruction(b, words, word_end,
7343 vtn_handle_variable_or_type_instruction);
7344
7345 /* Set types on all vtn_values */
7346 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
7347
7348 vtn_build_cfg(b, words, word_end);
7349
7350 fprintf(fp, "#include \"compiler/nir/nir_builder.h\"\n\n");
7351
7352 vtn_foreach_function(func, &b->functions) {
7353 if (func->linkage != SpvLinkageTypeExport)
7354 continue;
7355
7356 if (!func_to_nir_builder(fp, func))
7357 return false;
7358 }
7359
7360 ralloc_free(b);
7361 return true;
7362 }
7363