1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jason Ekstrand (jason@jlekstrand.net)
25 *
26 */
27
28 #include "vtn_private.h"
29 #include "nir/nir_vla.h"
30 #include "nir/nir_control_flow.h"
31 #include "nir/nir_constant_expressions.h"
32 #include "nir/nir_deref.h"
33 #include "spirv_info.h"
34
35 #include "util/format/u_format.h"
36 #include "util/u_math.h"
37 #include "util/u_string.h"
38
39 #include <stdio.h>
40
41 #ifndef NDEBUG
42 static enum nir_spirv_debug_level
vtn_default_log_level(void)43 vtn_default_log_level(void)
44 {
45 enum nir_spirv_debug_level level = NIR_SPIRV_DEBUG_LEVEL_WARNING;
46 const char *vtn_log_level_strings[] = {
47 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = "warning",
48 [NIR_SPIRV_DEBUG_LEVEL_INFO] = "info",
49 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = "error",
50 };
51 const char *str = getenv("MESA_SPIRV_LOG_LEVEL");
52
53 if (str == NULL)
54 return NIR_SPIRV_DEBUG_LEVEL_WARNING;
55
56 for (int i = 0; i < ARRAY_SIZE(vtn_log_level_strings); i++) {
57 if (strcasecmp(str, vtn_log_level_strings[i]) == 0) {
58 level = i;
59 break;
60 }
61 }
62
63 return level;
64 }
65 #endif
66
67 void
vtn_log(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)68 vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level,
69 size_t spirv_offset, const char *message)
70 {
71 if (b->options->debug.func) {
72 b->options->debug.func(b->options->debug.private_data,
73 level, spirv_offset, message);
74 }
75
76 #ifndef NDEBUG
77 static enum nir_spirv_debug_level default_level =
78 NIR_SPIRV_DEBUG_LEVEL_INVALID;
79
80 if (default_level == NIR_SPIRV_DEBUG_LEVEL_INVALID)
81 default_level = vtn_default_log_level();
82
83 if (level >= default_level)
84 fprintf(stderr, "%s\n", message);
85 #endif
86 }
87
88 void
vtn_logf(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * fmt,...)89 vtn_logf(struct vtn_builder *b, enum nir_spirv_debug_level level,
90 size_t spirv_offset, const char *fmt, ...)
91 {
92 va_list args;
93 char *msg;
94
95 va_start(args, fmt);
96 msg = ralloc_vasprintf(NULL, fmt, args);
97 va_end(args);
98
99 vtn_log(b, level, spirv_offset, msg);
100
101 ralloc_free(msg);
102 }
103
104 static void
vtn_log_err(struct vtn_builder * b,enum nir_spirv_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)105 vtn_log_err(struct vtn_builder *b,
106 enum nir_spirv_debug_level level, const char *prefix,
107 const char *file, unsigned line,
108 const char *fmt, va_list args)
109 {
110 char *msg;
111
112 msg = ralloc_strdup(NULL, prefix);
113
114 #ifndef NDEBUG
115 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line);
116 #endif
117
118 ralloc_asprintf_append(&msg, " ");
119
120 ralloc_vasprintf_append(&msg, fmt, args);
121
122 ralloc_asprintf_append(&msg, "\n %zu bytes into the SPIR-V binary",
123 b->spirv_offset);
124
125 if (b->file) {
126 ralloc_asprintf_append(&msg,
127 "\n in SPIR-V source file %s, line %d, col %d",
128 b->file, b->line, b->col);
129 }
130
131 vtn_log(b, level, b->spirv_offset, msg);
132
133 ralloc_free(msg);
134 }
135
136 static void
vtn_dump_shader(struct vtn_builder * b,const char * path,const char * prefix)137 vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix)
138 {
139 static int idx = 0;
140
141 char filename[1024];
142 int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv",
143 path, prefix, idx++);
144 if (len < 0 || len >= sizeof(filename))
145 return;
146
147 FILE *f = fopen(filename, "w");
148 if (f == NULL)
149 return;
150
151 fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f);
152 fclose(f);
153
154 vtn_info("SPIR-V shader dumped to %s", filename);
155 }
156
157 void
_vtn_warn(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)158 _vtn_warn(struct vtn_builder *b, const char *file, unsigned line,
159 const char *fmt, ...)
160 {
161 va_list args;
162
163 va_start(args, fmt);
164 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_WARNING, "SPIR-V WARNING:\n",
165 file, line, fmt, args);
166 va_end(args);
167 }
168
169 void
_vtn_err(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)170 _vtn_err(struct vtn_builder *b, const char *file, unsigned line,
171 const char *fmt, ...)
172 {
173 va_list args;
174
175 va_start(args, fmt);
176 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V ERROR:\n",
177 file, line, fmt, args);
178 va_end(args);
179 }
180
181 void
_vtn_fail(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)182 _vtn_fail(struct vtn_builder *b, const char *file, unsigned line,
183 const char *fmt, ...)
184 {
185 va_list args;
186
187 va_start(args, fmt);
188 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V parsing FAILED:\n",
189 file, line, fmt, args);
190 va_end(args);
191
192 const char *dump_path = getenv("MESA_SPIRV_FAIL_DUMP_PATH");
193 if (dump_path)
194 vtn_dump_shader(b, dump_path, "fail");
195
196 vtn_longjmp(b->fail_jump, 1);
197 }
198
199 static struct vtn_ssa_value *
vtn_undef_ssa_value(struct vtn_builder * b,const struct glsl_type * type)200 vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
201 {
202 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
203 val->type = glsl_get_bare_type(type);
204
205 if (glsl_type_is_vector_or_scalar(type)) {
206 unsigned num_components = glsl_get_vector_elements(val->type);
207 unsigned bit_size = glsl_get_bit_size(val->type);
208 val->def = nir_ssa_undef(&b->nb, num_components, bit_size);
209 } else {
210 unsigned elems = glsl_get_length(val->type);
211 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
212 if (glsl_type_is_array_or_matrix(type)) {
213 const struct glsl_type *elem_type = glsl_get_array_element(type);
214 for (unsigned i = 0; i < elems; i++)
215 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
216 } else {
217 vtn_assert(glsl_type_is_struct_or_ifc(type));
218 for (unsigned i = 0; i < elems; i++) {
219 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
220 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
221 }
222 }
223 }
224
225 return val;
226 }
227
228 struct vtn_ssa_value *
vtn_const_ssa_value(struct vtn_builder * b,nir_constant * constant,const struct glsl_type * type)229 vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
230 const struct glsl_type *type)
231 {
232 struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant);
233
234 if (entry)
235 return entry->data;
236
237 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
238 val->type = glsl_get_bare_type(type);
239
240 if (glsl_type_is_vector_or_scalar(type)) {
241 unsigned num_components = glsl_get_vector_elements(val->type);
242 unsigned bit_size = glsl_get_bit_size(type);
243 nir_load_const_instr *load =
244 nir_load_const_instr_create(b->shader, num_components, bit_size);
245
246 memcpy(load->value, constant->values,
247 sizeof(nir_const_value) * num_components);
248
249 nir_instr_insert_before_cf_list(&b->nb.impl->body, &load->instr);
250 val->def = &load->def;
251 } else {
252 unsigned elems = glsl_get_length(val->type);
253 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
254 if (glsl_type_is_array_or_matrix(type)) {
255 const struct glsl_type *elem_type = glsl_get_array_element(type);
256 for (unsigned i = 0; i < elems; i++) {
257 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
258 elem_type);
259 }
260 } else {
261 vtn_assert(glsl_type_is_struct_or_ifc(type));
262 for (unsigned i = 0; i < elems; i++) {
263 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
264 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
265 elem_type);
266 }
267 }
268 }
269
270 return val;
271 }
272
273 struct vtn_ssa_value *
vtn_ssa_value(struct vtn_builder * b,uint32_t value_id)274 vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
275 {
276 struct vtn_value *val = vtn_untyped_value(b, value_id);
277 switch (val->value_type) {
278 case vtn_value_type_undef:
279 return vtn_undef_ssa_value(b, val->type->type);
280
281 case vtn_value_type_constant:
282 return vtn_const_ssa_value(b, val->constant, val->type->type);
283
284 case vtn_value_type_ssa:
285 return val->ssa;
286
287 case vtn_value_type_pointer:
288 vtn_assert(val->pointer->ptr_type && val->pointer->ptr_type->type);
289 struct vtn_ssa_value *ssa =
290 vtn_create_ssa_value(b, val->pointer->ptr_type->type);
291 ssa->def = vtn_pointer_to_ssa(b, val->pointer);
292 return ssa;
293
294 default:
295 vtn_fail("Invalid type for an SSA value");
296 }
297 }
298
299 struct vtn_value *
vtn_push_ssa_value(struct vtn_builder * b,uint32_t value_id,struct vtn_ssa_value * ssa)300 vtn_push_ssa_value(struct vtn_builder *b, uint32_t value_id,
301 struct vtn_ssa_value *ssa)
302 {
303 struct vtn_type *type = vtn_get_value_type(b, value_id);
304
305 /* See vtn_create_ssa_value */
306 vtn_fail_if(ssa->type != glsl_get_bare_type(type->type),
307 "Type mismatch for SPIR-V SSA value");
308
309 struct vtn_value *val;
310 if (type->base_type == vtn_base_type_pointer) {
311 val = vtn_push_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type));
312 } else {
313 /* Don't trip the value_type_ssa check in vtn_push_value */
314 val = vtn_push_value(b, value_id, vtn_value_type_invalid);
315 val->value_type = vtn_value_type_ssa;
316 val->ssa = ssa;
317 }
318
319 return val;
320 }
321
322 nir_ssa_def *
vtn_get_nir_ssa(struct vtn_builder * b,uint32_t value_id)323 vtn_get_nir_ssa(struct vtn_builder *b, uint32_t value_id)
324 {
325 struct vtn_ssa_value *ssa = vtn_ssa_value(b, value_id);
326 vtn_fail_if(!glsl_type_is_vector_or_scalar(ssa->type),
327 "Expected a vector or scalar type");
328 return ssa->def;
329 }
330
331 struct vtn_value *
vtn_push_nir_ssa(struct vtn_builder * b,uint32_t value_id,nir_ssa_def * def)332 vtn_push_nir_ssa(struct vtn_builder *b, uint32_t value_id, nir_ssa_def *def)
333 {
334 /* Types for all SPIR-V SSA values are set as part of a pre-pass so the
335 * type will be valid by the time we get here.
336 */
337 struct vtn_type *type = vtn_get_value_type(b, value_id);
338 vtn_fail_if(def->num_components != glsl_get_vector_elements(type->type) ||
339 def->bit_size != glsl_get_bit_size(type->type),
340 "Mismatch between NIR and SPIR-V type.");
341 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
342 ssa->def = def;
343 return vtn_push_ssa_value(b, value_id, ssa);
344 }
345
346 static enum gl_access_qualifier
spirv_to_gl_access_qualifier(struct vtn_builder * b,SpvAccessQualifier access_qualifier)347 spirv_to_gl_access_qualifier(struct vtn_builder *b,
348 SpvAccessQualifier access_qualifier)
349 {
350 switch (access_qualifier) {
351 case SpvAccessQualifierReadOnly:
352 return ACCESS_NON_WRITEABLE;
353 case SpvAccessQualifierWriteOnly:
354 return ACCESS_NON_READABLE;
355 case SpvAccessQualifierReadWrite:
356 return 0;
357 default:
358 vtn_fail("Invalid image access qualifier");
359 }
360 }
361
362 static nir_deref_instr *
vtn_get_image(struct vtn_builder * b,uint32_t value_id,enum gl_access_qualifier * access)363 vtn_get_image(struct vtn_builder *b, uint32_t value_id,
364 enum gl_access_qualifier *access)
365 {
366 struct vtn_type *type = vtn_get_value_type(b, value_id);
367 vtn_assert(type->base_type == vtn_base_type_image);
368 if (access)
369 *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier);
370 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
371 nir_var_uniform, type->glsl_image, 0);
372 }
373
374 static void
vtn_push_image(struct vtn_builder * b,uint32_t value_id,nir_deref_instr * deref,bool propagate_non_uniform)375 vtn_push_image(struct vtn_builder *b, uint32_t value_id,
376 nir_deref_instr *deref, bool propagate_non_uniform)
377 {
378 struct vtn_type *type = vtn_get_value_type(b, value_id);
379 vtn_assert(type->base_type == vtn_base_type_image);
380 struct vtn_value *value = vtn_push_nir_ssa(b, value_id, &deref->dest.ssa);
381 value->propagated_non_uniform = propagate_non_uniform;
382 }
383
384 static nir_deref_instr *
vtn_get_sampler(struct vtn_builder * b,uint32_t value_id)385 vtn_get_sampler(struct vtn_builder *b, uint32_t value_id)
386 {
387 struct vtn_type *type = vtn_get_value_type(b, value_id);
388 vtn_assert(type->base_type == vtn_base_type_sampler);
389 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
390 nir_var_uniform, glsl_bare_sampler_type(), 0);
391 }
392
393 nir_ssa_def *
vtn_sampled_image_to_nir_ssa(struct vtn_builder * b,struct vtn_sampled_image si)394 vtn_sampled_image_to_nir_ssa(struct vtn_builder *b,
395 struct vtn_sampled_image si)
396 {
397 return nir_vec2(&b->nb, &si.image->dest.ssa, &si.sampler->dest.ssa);
398 }
399
400 static void
vtn_push_sampled_image(struct vtn_builder * b,uint32_t value_id,struct vtn_sampled_image si,bool propagate_non_uniform)401 vtn_push_sampled_image(struct vtn_builder *b, uint32_t value_id,
402 struct vtn_sampled_image si, bool propagate_non_uniform)
403 {
404 struct vtn_type *type = vtn_get_value_type(b, value_id);
405 vtn_assert(type->base_type == vtn_base_type_sampled_image);
406 struct vtn_value *value = vtn_push_nir_ssa(b, value_id,
407 vtn_sampled_image_to_nir_ssa(b, si));
408 value->propagated_non_uniform = propagate_non_uniform;
409 }
410
411 static struct vtn_sampled_image
vtn_get_sampled_image(struct vtn_builder * b,uint32_t value_id)412 vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id)
413 {
414 struct vtn_type *type = vtn_get_value_type(b, value_id);
415 vtn_assert(type->base_type == vtn_base_type_sampled_image);
416 nir_ssa_def *si_vec2 = vtn_get_nir_ssa(b, value_id);
417
418 struct vtn_sampled_image si = { NULL, };
419 si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0),
420 nir_var_uniform,
421 type->image->glsl_image, 0);
422 si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1),
423 nir_var_uniform,
424 glsl_bare_sampler_type(), 0);
425 return si;
426 }
427
428 static const char *
vtn_string_literal(struct vtn_builder * b,const uint32_t * words,unsigned word_count,unsigned * words_used)429 vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
430 unsigned word_count, unsigned *words_used)
431 {
432 /* From the SPIR-V spec:
433 *
434 * "A string is interpreted as a nul-terminated stream of characters.
435 * The character set is Unicode in the UTF-8 encoding scheme. The UTF-8
436 * octets (8-bit bytes) are packed four per word, following the
437 * little-endian convention (i.e., the first octet is in the
438 * lowest-order 8 bits of the word). The final word contains the
439 * string’s nul-termination character (0), and all contents past the
440 * end of the string in the final word are padded with 0."
441 *
442 * On big-endian, we need to byte-swap.
443 */
444 #if UTIL_ARCH_BIG_ENDIAN
445 {
446 uint32_t *copy = ralloc_array(b, uint32_t, word_count);
447 for (unsigned i = 0; i < word_count; i++)
448 copy[i] = util_bswap32(words[i]);
449 words = copy;
450 }
451 #endif
452
453 const char *str = (char *)words;
454 const char *end = memchr(str, 0, word_count * 4);
455 vtn_fail_if(end == NULL, "String is not null-terminated");
456
457 if (words_used)
458 *words_used = DIV_ROUND_UP(end - str + 1, sizeof(*words));
459
460 return str;
461 }
462
463 const uint32_t *
vtn_foreach_instruction(struct vtn_builder * b,const uint32_t * start,const uint32_t * end,vtn_instruction_handler handler)464 vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
465 const uint32_t *end, vtn_instruction_handler handler)
466 {
467 b->file = NULL;
468 b->line = -1;
469 b->col = -1;
470
471 const uint32_t *w = start;
472 while (w < end) {
473 SpvOp opcode = w[0] & SpvOpCodeMask;
474 unsigned count = w[0] >> SpvWordCountShift;
475 vtn_assert(count >= 1 && w + count <= end);
476
477 b->spirv_offset = (uint8_t *)w - (uint8_t *)b->spirv;
478
479 switch (opcode) {
480 case SpvOpNop:
481 break; /* Do nothing */
482
483 case SpvOpLine:
484 b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
485 b->line = w[2];
486 b->col = w[3];
487 break;
488
489 case SpvOpNoLine:
490 b->file = NULL;
491 b->line = -1;
492 b->col = -1;
493 break;
494
495 default:
496 if (!handler(b, opcode, w, count))
497 return w;
498 break;
499 }
500
501 w += count;
502 }
503
504 b->spirv_offset = 0;
505 b->file = NULL;
506 b->line = -1;
507 b->col = -1;
508
509 assert(w == end);
510 return w;
511 }
512
513 static bool
vtn_handle_non_semantic_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)514 vtn_handle_non_semantic_instruction(struct vtn_builder *b, SpvOp ext_opcode,
515 const uint32_t *w, unsigned count)
516 {
517 /* Do nothing. */
518 return true;
519 }
520
521 static void
vtn_handle_extension(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)522 vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
523 const uint32_t *w, unsigned count)
524 {
525 switch (opcode) {
526 case SpvOpExtInstImport: {
527 struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
528 const char *ext = vtn_string_literal(b, &w[2], count - 2, NULL);
529 if (strcmp(ext, "GLSL.std.450") == 0) {
530 val->ext_handler = vtn_handle_glsl450_instruction;
531 } else if ((strcmp(ext, "SPV_AMD_gcn_shader") == 0)
532 && (b->options && b->options->caps.amd_gcn_shader)) {
533 val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
534 } else if ((strcmp(ext, "SPV_AMD_shader_ballot") == 0)
535 && (b->options && b->options->caps.amd_shader_ballot)) {
536 val->ext_handler = vtn_handle_amd_shader_ballot_instruction;
537 } else if ((strcmp(ext, "SPV_AMD_shader_trinary_minmax") == 0)
538 && (b->options && b->options->caps.amd_trinary_minmax)) {
539 val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction;
540 } else if ((strcmp(ext, "SPV_AMD_shader_explicit_vertex_parameter") == 0)
541 && (b->options && b->options->caps.amd_shader_explicit_vertex_parameter)) {
542 val->ext_handler = vtn_handle_amd_shader_explicit_vertex_parameter_instruction;
543 } else if (strcmp(ext, "OpenCL.std") == 0) {
544 val->ext_handler = vtn_handle_opencl_instruction;
545 } else if (strstr(ext, "NonSemantic.") == ext) {
546 val->ext_handler = vtn_handle_non_semantic_instruction;
547 } else {
548 vtn_fail("Unsupported extension: %s", ext);
549 }
550 break;
551 }
552
553 case SpvOpExtInst: {
554 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
555 bool handled = val->ext_handler(b, w[4], w, count);
556 vtn_assert(handled);
557 break;
558 }
559
560 default:
561 vtn_fail_with_opcode("Unhandled opcode", opcode);
562 }
563 }
564
565 static void
_foreach_decoration_helper(struct vtn_builder * b,struct vtn_value * base_value,int parent_member,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)566 _foreach_decoration_helper(struct vtn_builder *b,
567 struct vtn_value *base_value,
568 int parent_member,
569 struct vtn_value *value,
570 vtn_decoration_foreach_cb cb, void *data)
571 {
572 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
573 int member;
574 if (dec->scope == VTN_DEC_DECORATION) {
575 member = parent_member;
576 } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
577 vtn_fail_if(value->value_type != vtn_value_type_type ||
578 value->type->base_type != vtn_base_type_struct,
579 "OpMemberDecorate and OpGroupMemberDecorate are only "
580 "allowed on OpTypeStruct");
581 /* This means we haven't recursed yet */
582 assert(value == base_value);
583
584 member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
585
586 vtn_fail_if(member >= base_value->type->length,
587 "OpMemberDecorate specifies member %d but the "
588 "OpTypeStruct has only %u members",
589 member, base_value->type->length);
590 } else {
591 /* Not a decoration */
592 assert(dec->scope == VTN_DEC_EXECUTION_MODE);
593 continue;
594 }
595
596 if (dec->group) {
597 assert(dec->group->value_type == vtn_value_type_decoration_group);
598 _foreach_decoration_helper(b, base_value, member, dec->group,
599 cb, data);
600 } else {
601 cb(b, base_value, member, dec, data);
602 }
603 }
604 }
605
606 /** Iterates (recursively if needed) over all of the decorations on a value
607 *
608 * This function iterates over all of the decorations applied to a given
609 * value. If it encounters a decoration group, it recurses into the group
610 * and iterates over all of those decorations as well.
611 */
612 void
vtn_foreach_decoration(struct vtn_builder * b,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)613 vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
614 vtn_decoration_foreach_cb cb, void *data)
615 {
616 _foreach_decoration_helper(b, value, -1, value, cb, data);
617 }
618
619 void
vtn_foreach_execution_mode(struct vtn_builder * b,struct vtn_value * value,vtn_execution_mode_foreach_cb cb,void * data)620 vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
621 vtn_execution_mode_foreach_cb cb, void *data)
622 {
623 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
624 if (dec->scope != VTN_DEC_EXECUTION_MODE)
625 continue;
626
627 assert(dec->group == NULL);
628 cb(b, value, dec, data);
629 }
630 }
631
632 void
vtn_handle_decoration(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)633 vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
634 const uint32_t *w, unsigned count)
635 {
636 const uint32_t *w_end = w + count;
637 const uint32_t target = w[1];
638 w += 2;
639
640 switch (opcode) {
641 case SpvOpDecorationGroup:
642 vtn_push_value(b, target, vtn_value_type_decoration_group);
643 break;
644
645 case SpvOpDecorate:
646 case SpvOpDecorateId:
647 case SpvOpMemberDecorate:
648 case SpvOpDecorateString:
649 case SpvOpMemberDecorateString:
650 case SpvOpExecutionMode:
651 case SpvOpExecutionModeId: {
652 struct vtn_value *val = vtn_untyped_value(b, target);
653
654 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
655 switch (opcode) {
656 case SpvOpDecorate:
657 case SpvOpDecorateId:
658 case SpvOpDecorateString:
659 dec->scope = VTN_DEC_DECORATION;
660 break;
661 case SpvOpMemberDecorate:
662 case SpvOpMemberDecorateString:
663 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
664 vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */
665 "Member argument of OpMemberDecorate too large");
666 break;
667 case SpvOpExecutionMode:
668 case SpvOpExecutionModeId:
669 dec->scope = VTN_DEC_EXECUTION_MODE;
670 break;
671 default:
672 unreachable("Invalid decoration opcode");
673 }
674 dec->decoration = *(w++);
675 dec->operands = w;
676
677 /* Link into the list */
678 dec->next = val->decoration;
679 val->decoration = dec;
680 break;
681 }
682
683 case SpvOpGroupMemberDecorate:
684 case SpvOpGroupDecorate: {
685 struct vtn_value *group =
686 vtn_value(b, target, vtn_value_type_decoration_group);
687
688 for (; w < w_end; w++) {
689 struct vtn_value *val = vtn_untyped_value(b, *w);
690 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
691
692 dec->group = group;
693 if (opcode == SpvOpGroupDecorate) {
694 dec->scope = VTN_DEC_DECORATION;
695 } else {
696 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
697 vtn_fail_if(dec->scope < 0, /* Check for overflow */
698 "Member argument of OpGroupMemberDecorate too large");
699 }
700
701 /* Link into the list */
702 dec->next = val->decoration;
703 val->decoration = dec;
704 }
705 break;
706 }
707
708 default:
709 unreachable("Unhandled opcode");
710 }
711 }
712
713 struct member_decoration_ctx {
714 unsigned num_fields;
715 struct glsl_struct_field *fields;
716 struct vtn_type *type;
717 };
718
719 /**
720 * Returns true if the given type contains a struct decorated Block or
721 * BufferBlock
722 */
723 bool
vtn_type_contains_block(struct vtn_builder * b,struct vtn_type * type)724 vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type)
725 {
726 switch (type->base_type) {
727 case vtn_base_type_array:
728 return vtn_type_contains_block(b, type->array_element);
729 case vtn_base_type_struct:
730 if (type->block || type->buffer_block)
731 return true;
732 for (unsigned i = 0; i < type->length; i++) {
733 if (vtn_type_contains_block(b, type->members[i]))
734 return true;
735 }
736 return false;
737 default:
738 return false;
739 }
740 }
741
742 /** Returns true if two types are "compatible", i.e. you can do an OpLoad,
743 * OpStore, or OpCopyMemory between them without breaking anything.
744 * Technically, the SPIR-V rules require the exact same type ID but this lets
745 * us internally be a bit looser.
746 */
747 bool
vtn_types_compatible(struct vtn_builder * b,struct vtn_type * t1,struct vtn_type * t2)748 vtn_types_compatible(struct vtn_builder *b,
749 struct vtn_type *t1, struct vtn_type *t2)
750 {
751 if (t1->id == t2->id)
752 return true;
753
754 if (t1->base_type != t2->base_type)
755 return false;
756
757 switch (t1->base_type) {
758 case vtn_base_type_void:
759 case vtn_base_type_scalar:
760 case vtn_base_type_vector:
761 case vtn_base_type_matrix:
762 case vtn_base_type_image:
763 case vtn_base_type_sampler:
764 case vtn_base_type_sampled_image:
765 case vtn_base_type_event:
766 return t1->type == t2->type;
767
768 case vtn_base_type_array:
769 return t1->length == t2->length &&
770 vtn_types_compatible(b, t1->array_element, t2->array_element);
771
772 case vtn_base_type_pointer:
773 return vtn_types_compatible(b, t1->deref, t2->deref);
774
775 case vtn_base_type_struct:
776 if (t1->length != t2->length)
777 return false;
778
779 for (unsigned i = 0; i < t1->length; i++) {
780 if (!vtn_types_compatible(b, t1->members[i], t2->members[i]))
781 return false;
782 }
783 return true;
784
785 case vtn_base_type_accel_struct:
786 return true;
787
788 case vtn_base_type_function:
789 /* This case shouldn't get hit since you can't copy around function
790 * types. Just require them to be identical.
791 */
792 return false;
793 }
794
795 vtn_fail("Invalid base type");
796 }
797
798 struct vtn_type *
vtn_type_without_array(struct vtn_type * type)799 vtn_type_without_array(struct vtn_type *type)
800 {
801 while (type->base_type == vtn_base_type_array)
802 type = type->array_element;
803 return type;
804 }
805
806 /* does a shallow copy of a vtn_type */
807
808 static struct vtn_type *
vtn_type_copy(struct vtn_builder * b,struct vtn_type * src)809 vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
810 {
811 struct vtn_type *dest = ralloc(b, struct vtn_type);
812 *dest = *src;
813
814 switch (src->base_type) {
815 case vtn_base_type_void:
816 case vtn_base_type_scalar:
817 case vtn_base_type_vector:
818 case vtn_base_type_matrix:
819 case vtn_base_type_array:
820 case vtn_base_type_pointer:
821 case vtn_base_type_image:
822 case vtn_base_type_sampler:
823 case vtn_base_type_sampled_image:
824 case vtn_base_type_event:
825 case vtn_base_type_accel_struct:
826 /* Nothing more to do */
827 break;
828
829 case vtn_base_type_struct:
830 dest->members = ralloc_array(b, struct vtn_type *, src->length);
831 memcpy(dest->members, src->members,
832 src->length * sizeof(src->members[0]));
833
834 dest->offsets = ralloc_array(b, unsigned, src->length);
835 memcpy(dest->offsets, src->offsets,
836 src->length * sizeof(src->offsets[0]));
837 break;
838
839 case vtn_base_type_function:
840 dest->params = ralloc_array(b, struct vtn_type *, src->length);
841 memcpy(dest->params, src->params, src->length * sizeof(src->params[0]));
842 break;
843 }
844
845 return dest;
846 }
847
848 static const struct glsl_type *
wrap_type_in_array(const struct glsl_type * type,const struct glsl_type * array_type)849 wrap_type_in_array(const struct glsl_type *type,
850 const struct glsl_type *array_type)
851 {
852 if (!glsl_type_is_array(array_type))
853 return type;
854
855 const struct glsl_type *elem_type =
856 wrap_type_in_array(type, glsl_get_array_element(array_type));
857 return glsl_array_type(elem_type, glsl_get_length(array_type),
858 glsl_get_explicit_stride(array_type));
859 }
860
861 static bool
vtn_type_needs_explicit_layout(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)862 vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type,
863 enum vtn_variable_mode mode)
864 {
865 /* For OpenCL we never want to strip the info from the types, and it makes
866 * type comparisons easier in later stages.
867 */
868 if (b->options->environment == NIR_SPIRV_OPENCL)
869 return true;
870
871 switch (mode) {
872 case vtn_variable_mode_input:
873 case vtn_variable_mode_output:
874 /* Layout decorations kept because we need offsets for XFB arrays of
875 * blocks.
876 */
877 return b->shader->info.has_transform_feedback_varyings;
878
879 case vtn_variable_mode_ssbo:
880 case vtn_variable_mode_phys_ssbo:
881 case vtn_variable_mode_ubo:
882 case vtn_variable_mode_push_constant:
883 case vtn_variable_mode_shader_record:
884 return true;
885
886 case vtn_variable_mode_workgroup:
887 return b->options->caps.workgroup_memory_explicit_layout;
888
889 default:
890 return false;
891 }
892 }
893
894 const struct glsl_type *
vtn_type_get_nir_type(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)895 vtn_type_get_nir_type(struct vtn_builder *b, struct vtn_type *type,
896 enum vtn_variable_mode mode)
897 {
898 if (mode == vtn_variable_mode_atomic_counter) {
899 vtn_fail_if(glsl_without_array(type->type) != glsl_uint_type(),
900 "Variables in the AtomicCounter storage class should be "
901 "(possibly arrays of arrays of) uint.");
902 return wrap_type_in_array(glsl_atomic_uint_type(), type->type);
903 }
904
905 if (mode == vtn_variable_mode_uniform) {
906 switch (type->base_type) {
907 case vtn_base_type_array: {
908 const struct glsl_type *elem_type =
909 vtn_type_get_nir_type(b, type->array_element, mode);
910
911 return glsl_array_type(elem_type, type->length,
912 glsl_get_explicit_stride(type->type));
913 }
914
915 case vtn_base_type_struct: {
916 bool need_new_struct = false;
917 const uint32_t num_fields = type->length;
918 NIR_VLA(struct glsl_struct_field, fields, num_fields);
919 for (unsigned i = 0; i < num_fields; i++) {
920 fields[i] = *glsl_get_struct_field_data(type->type, i);
921 const struct glsl_type *field_nir_type =
922 vtn_type_get_nir_type(b, type->members[i], mode);
923 if (fields[i].type != field_nir_type) {
924 fields[i].type = field_nir_type;
925 need_new_struct = true;
926 }
927 }
928 if (need_new_struct) {
929 if (glsl_type_is_interface(type->type)) {
930 return glsl_interface_type(fields, num_fields,
931 /* packing */ 0, false,
932 glsl_get_type_name(type->type));
933 } else {
934 return glsl_struct_type(fields, num_fields,
935 glsl_get_type_name(type->type),
936 glsl_struct_type_is_packed(type->type));
937 }
938 } else {
939 /* No changes, just pass it on */
940 return type->type;
941 }
942 }
943
944 case vtn_base_type_image:
945 return type->glsl_image;
946
947 case vtn_base_type_sampler:
948 return glsl_bare_sampler_type();
949
950 case vtn_base_type_sampled_image:
951 return type->image->glsl_image;
952
953 default:
954 return type->type;
955 }
956 }
957
958 /* Layout decorations are allowed but ignored in certain conditions,
959 * to allow SPIR-V generators perform type deduplication. Discard
960 * unnecessary ones when passing to NIR.
961 */
962 if (!vtn_type_needs_explicit_layout(b, type, mode))
963 return glsl_get_bare_type(type->type);
964
965 return type->type;
966 }
967
968 static struct vtn_type *
mutable_matrix_member(struct vtn_builder * b,struct vtn_type * type,int member)969 mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
970 {
971 type->members[member] = vtn_type_copy(b, type->members[member]);
972 type = type->members[member];
973
974 /* We may have an array of matrices.... Oh, joy! */
975 while (glsl_type_is_array(type->type)) {
976 type->array_element = vtn_type_copy(b, type->array_element);
977 type = type->array_element;
978 }
979
980 vtn_assert(glsl_type_is_matrix(type->type));
981
982 return type;
983 }
984
985 static void
vtn_handle_access_qualifier(struct vtn_builder * b,struct vtn_type * type,int member,enum gl_access_qualifier access)986 vtn_handle_access_qualifier(struct vtn_builder *b, struct vtn_type *type,
987 int member, enum gl_access_qualifier access)
988 {
989 type->members[member] = vtn_type_copy(b, type->members[member]);
990 type = type->members[member];
991
992 type->access |= access;
993 }
994
995 static void
array_stride_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)996 array_stride_decoration_cb(struct vtn_builder *b,
997 struct vtn_value *val, int member,
998 const struct vtn_decoration *dec, void *void_ctx)
999 {
1000 struct vtn_type *type = val->type;
1001
1002 if (dec->decoration == SpvDecorationArrayStride) {
1003 if (vtn_type_contains_block(b, type)) {
1004 vtn_warn("The ArrayStride decoration cannot be applied to an array "
1005 "type which contains a structure type decorated Block "
1006 "or BufferBlock");
1007 /* Ignore the decoration */
1008 } else {
1009 vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero");
1010 type->stride = dec->operands[0];
1011 }
1012 }
1013 }
1014
1015 static void
struct_member_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1016 struct_member_decoration_cb(struct vtn_builder *b,
1017 UNUSED struct vtn_value *val, int member,
1018 const struct vtn_decoration *dec, void *void_ctx)
1019 {
1020 struct member_decoration_ctx *ctx = void_ctx;
1021
1022 if (member < 0)
1023 return;
1024
1025 assert(member < ctx->num_fields);
1026
1027 switch (dec->decoration) {
1028 case SpvDecorationRelaxedPrecision:
1029 case SpvDecorationUniform:
1030 case SpvDecorationUniformId:
1031 break; /* FIXME: Do nothing with this for now. */
1032 case SpvDecorationNonWritable:
1033 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_WRITEABLE);
1034 break;
1035 case SpvDecorationNonReadable:
1036 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_READABLE);
1037 break;
1038 case SpvDecorationVolatile:
1039 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_VOLATILE);
1040 break;
1041 case SpvDecorationCoherent:
1042 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_COHERENT);
1043 break;
1044 case SpvDecorationNoPerspective:
1045 ctx->fields[member].interpolation = INTERP_MODE_NOPERSPECTIVE;
1046 break;
1047 case SpvDecorationFlat:
1048 ctx->fields[member].interpolation = INTERP_MODE_FLAT;
1049 break;
1050 case SpvDecorationExplicitInterpAMD:
1051 ctx->fields[member].interpolation = INTERP_MODE_EXPLICIT;
1052 break;
1053 case SpvDecorationCentroid:
1054 ctx->fields[member].centroid = true;
1055 break;
1056 case SpvDecorationSample:
1057 ctx->fields[member].sample = true;
1058 break;
1059 case SpvDecorationStream:
1060 /* This is handled later by var_decoration_cb in vtn_variables.c */
1061 break;
1062 case SpvDecorationLocation:
1063 ctx->fields[member].location = dec->operands[0];
1064 break;
1065 case SpvDecorationComponent:
1066 break; /* FIXME: What should we do with these? */
1067 case SpvDecorationBuiltIn:
1068 ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
1069 ctx->type->members[member]->is_builtin = true;
1070 ctx->type->members[member]->builtin = dec->operands[0];
1071 ctx->type->builtin_block = true;
1072 break;
1073 case SpvDecorationOffset:
1074 ctx->type->offsets[member] = dec->operands[0];
1075 ctx->fields[member].offset = dec->operands[0];
1076 break;
1077 case SpvDecorationMatrixStride:
1078 /* Handled as a second pass */
1079 break;
1080 case SpvDecorationColMajor:
1081 break; /* Nothing to do here. Column-major is the default. */
1082 case SpvDecorationRowMajor:
1083 mutable_matrix_member(b, ctx->type, member)->row_major = true;
1084 break;
1085
1086 case SpvDecorationPatch:
1087 case SpvDecorationPerPrimitiveNV:
1088 case SpvDecorationPerTaskNV:
1089 break;
1090
1091 case SpvDecorationSpecId:
1092 case SpvDecorationBlock:
1093 case SpvDecorationBufferBlock:
1094 case SpvDecorationArrayStride:
1095 case SpvDecorationGLSLShared:
1096 case SpvDecorationGLSLPacked:
1097 case SpvDecorationInvariant:
1098 case SpvDecorationRestrict:
1099 case SpvDecorationAliased:
1100 case SpvDecorationConstant:
1101 case SpvDecorationIndex:
1102 case SpvDecorationBinding:
1103 case SpvDecorationDescriptorSet:
1104 case SpvDecorationLinkageAttributes:
1105 case SpvDecorationNoContraction:
1106 case SpvDecorationInputAttachmentIndex:
1107 case SpvDecorationCPacked:
1108 vtn_warn("Decoration not allowed on struct members: %s",
1109 spirv_decoration_to_string(dec->decoration));
1110 break;
1111
1112 case SpvDecorationXfbBuffer:
1113 case SpvDecorationXfbStride:
1114 /* This is handled later by var_decoration_cb in vtn_variables.c */
1115 break;
1116
1117 case SpvDecorationSaturatedConversion:
1118 case SpvDecorationFuncParamAttr:
1119 case SpvDecorationFPRoundingMode:
1120 case SpvDecorationFPFastMathMode:
1121 case SpvDecorationAlignment:
1122 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1123 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1124 spirv_decoration_to_string(dec->decoration));
1125 }
1126 break;
1127
1128 case SpvDecorationUserSemantic:
1129 case SpvDecorationUserTypeGOOGLE:
1130 /* User semantic decorations can safely be ignored by the driver. */
1131 break;
1132
1133 case SpvDecorationPerViewNV:
1134 /* TODO(mesh): Handle multiview. */
1135 vtn_warn("Mesh multiview not yet supported. Needed for decoration PerViewNV.");
1136 break;
1137
1138 default:
1139 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1140 }
1141 }
1142
1143 /** Chases the array type all the way down to the tail and rewrites the
1144 * glsl_types to be based off the tail's glsl_type.
1145 */
1146 static void
vtn_array_type_rewrite_glsl_type(struct vtn_type * type)1147 vtn_array_type_rewrite_glsl_type(struct vtn_type *type)
1148 {
1149 if (type->base_type != vtn_base_type_array)
1150 return;
1151
1152 vtn_array_type_rewrite_glsl_type(type->array_element);
1153
1154 type->type = glsl_array_type(type->array_element->type,
1155 type->length, type->stride);
1156 }
1157
1158 /* Matrix strides are handled as a separate pass because we need to know
1159 * whether the matrix is row-major or not first.
1160 */
1161 static void
struct_member_matrix_stride_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1162 struct_member_matrix_stride_cb(struct vtn_builder *b,
1163 UNUSED struct vtn_value *val, int member,
1164 const struct vtn_decoration *dec,
1165 void *void_ctx)
1166 {
1167 if (dec->decoration != SpvDecorationMatrixStride)
1168 return;
1169
1170 vtn_fail_if(member < 0,
1171 "The MatrixStride decoration is only allowed on members "
1172 "of OpTypeStruct");
1173 vtn_fail_if(dec->operands[0] == 0, "MatrixStride must be non-zero");
1174
1175 struct member_decoration_ctx *ctx = void_ctx;
1176
1177 struct vtn_type *mat_type = mutable_matrix_member(b, ctx->type, member);
1178 if (mat_type->row_major) {
1179 mat_type->array_element = vtn_type_copy(b, mat_type->array_element);
1180 mat_type->stride = mat_type->array_element->stride;
1181 mat_type->array_element->stride = dec->operands[0];
1182
1183 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1184 dec->operands[0], true);
1185 mat_type->array_element->type = glsl_get_column_type(mat_type->type);
1186 } else {
1187 vtn_assert(mat_type->array_element->stride > 0);
1188 mat_type->stride = dec->operands[0];
1189
1190 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1191 dec->operands[0], false);
1192 }
1193
1194 /* Now that we've replaced the glsl_type with a properly strided matrix
1195 * type, rewrite the member type so that it's an array of the proper kind
1196 * of glsl_type.
1197 */
1198 vtn_array_type_rewrite_glsl_type(ctx->type->members[member]);
1199 ctx->fields[member].type = ctx->type->members[member]->type;
1200 }
1201
1202 static void
struct_packed_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1203 struct_packed_decoration_cb(struct vtn_builder *b,
1204 struct vtn_value *val, int member,
1205 const struct vtn_decoration *dec, void *void_ctx)
1206 {
1207 vtn_assert(val->type->base_type == vtn_base_type_struct);
1208 if (dec->decoration == SpvDecorationCPacked) {
1209 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1210 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1211 spirv_decoration_to_string(dec->decoration));
1212 }
1213 val->type->packed = true;
1214 }
1215 }
1216
1217 static void
struct_block_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * ctx)1218 struct_block_decoration_cb(struct vtn_builder *b,
1219 struct vtn_value *val, int member,
1220 const struct vtn_decoration *dec, void *ctx)
1221 {
1222 if (member != -1)
1223 return;
1224
1225 struct vtn_type *type = val->type;
1226 if (dec->decoration == SpvDecorationBlock)
1227 type->block = true;
1228 else if (dec->decoration == SpvDecorationBufferBlock)
1229 type->buffer_block = true;
1230 }
1231
1232 static void
type_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,UNUSED void * ctx)1233 type_decoration_cb(struct vtn_builder *b,
1234 struct vtn_value *val, int member,
1235 const struct vtn_decoration *dec, UNUSED void *ctx)
1236 {
1237 struct vtn_type *type = val->type;
1238
1239 if (member != -1) {
1240 /* This should have been handled by OpTypeStruct */
1241 assert(val->type->base_type == vtn_base_type_struct);
1242 assert(member >= 0 && member < val->type->length);
1243 return;
1244 }
1245
1246 switch (dec->decoration) {
1247 case SpvDecorationArrayStride:
1248 vtn_assert(type->base_type == vtn_base_type_array ||
1249 type->base_type == vtn_base_type_pointer);
1250 break;
1251 case SpvDecorationBlock:
1252 vtn_assert(type->base_type == vtn_base_type_struct);
1253 vtn_assert(type->block);
1254 break;
1255 case SpvDecorationBufferBlock:
1256 vtn_assert(type->base_type == vtn_base_type_struct);
1257 vtn_assert(type->buffer_block);
1258 break;
1259 case SpvDecorationGLSLShared:
1260 case SpvDecorationGLSLPacked:
1261 /* Ignore these, since we get explicit offsets anyways */
1262 break;
1263
1264 case SpvDecorationRowMajor:
1265 case SpvDecorationColMajor:
1266 case SpvDecorationMatrixStride:
1267 case SpvDecorationBuiltIn:
1268 case SpvDecorationNoPerspective:
1269 case SpvDecorationFlat:
1270 case SpvDecorationPatch:
1271 case SpvDecorationCentroid:
1272 case SpvDecorationSample:
1273 case SpvDecorationExplicitInterpAMD:
1274 case SpvDecorationVolatile:
1275 case SpvDecorationCoherent:
1276 case SpvDecorationNonWritable:
1277 case SpvDecorationNonReadable:
1278 case SpvDecorationUniform:
1279 case SpvDecorationUniformId:
1280 case SpvDecorationLocation:
1281 case SpvDecorationComponent:
1282 case SpvDecorationOffset:
1283 case SpvDecorationXfbBuffer:
1284 case SpvDecorationXfbStride:
1285 case SpvDecorationUserSemantic:
1286 vtn_warn("Decoration only allowed for struct members: %s",
1287 spirv_decoration_to_string(dec->decoration));
1288 break;
1289
1290 case SpvDecorationStream:
1291 /* We don't need to do anything here, as stream is filled up when
1292 * aplying the decoration to a variable, just check that if it is not a
1293 * struct member, it should be a struct.
1294 */
1295 vtn_assert(type->base_type == vtn_base_type_struct);
1296 break;
1297
1298 case SpvDecorationRelaxedPrecision:
1299 case SpvDecorationSpecId:
1300 case SpvDecorationInvariant:
1301 case SpvDecorationRestrict:
1302 case SpvDecorationAliased:
1303 case SpvDecorationConstant:
1304 case SpvDecorationIndex:
1305 case SpvDecorationBinding:
1306 case SpvDecorationDescriptorSet:
1307 case SpvDecorationLinkageAttributes:
1308 case SpvDecorationNoContraction:
1309 case SpvDecorationInputAttachmentIndex:
1310 vtn_warn("Decoration not allowed on types: %s",
1311 spirv_decoration_to_string(dec->decoration));
1312 break;
1313
1314 case SpvDecorationCPacked:
1315 /* Handled when parsing a struct type, nothing to do here. */
1316 break;
1317
1318 case SpvDecorationSaturatedConversion:
1319 case SpvDecorationFuncParamAttr:
1320 case SpvDecorationFPRoundingMode:
1321 case SpvDecorationFPFastMathMode:
1322 case SpvDecorationAlignment:
1323 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1324 spirv_decoration_to_string(dec->decoration));
1325 break;
1326
1327 case SpvDecorationUserTypeGOOGLE:
1328 /* User semantic decorations can safely be ignored by the driver. */
1329 break;
1330
1331 default:
1332 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1333 }
1334 }
1335
1336 static unsigned
translate_image_format(struct vtn_builder * b,SpvImageFormat format)1337 translate_image_format(struct vtn_builder *b, SpvImageFormat format)
1338 {
1339 switch (format) {
1340 case SpvImageFormatUnknown: return PIPE_FORMAT_NONE;
1341 case SpvImageFormatRgba32f: return PIPE_FORMAT_R32G32B32A32_FLOAT;
1342 case SpvImageFormatRgba16f: return PIPE_FORMAT_R16G16B16A16_FLOAT;
1343 case SpvImageFormatR32f: return PIPE_FORMAT_R32_FLOAT;
1344 case SpvImageFormatRgba8: return PIPE_FORMAT_R8G8B8A8_UNORM;
1345 case SpvImageFormatRgba8Snorm: return PIPE_FORMAT_R8G8B8A8_SNORM;
1346 case SpvImageFormatRg32f: return PIPE_FORMAT_R32G32_FLOAT;
1347 case SpvImageFormatRg16f: return PIPE_FORMAT_R16G16_FLOAT;
1348 case SpvImageFormatR11fG11fB10f: return PIPE_FORMAT_R11G11B10_FLOAT;
1349 case SpvImageFormatR16f: return PIPE_FORMAT_R16_FLOAT;
1350 case SpvImageFormatRgba16: return PIPE_FORMAT_R16G16B16A16_UNORM;
1351 case SpvImageFormatRgb10A2: return PIPE_FORMAT_R10G10B10A2_UNORM;
1352 case SpvImageFormatRg16: return PIPE_FORMAT_R16G16_UNORM;
1353 case SpvImageFormatRg8: return PIPE_FORMAT_R8G8_UNORM;
1354 case SpvImageFormatR16: return PIPE_FORMAT_R16_UNORM;
1355 case SpvImageFormatR8: return PIPE_FORMAT_R8_UNORM;
1356 case SpvImageFormatRgba16Snorm: return PIPE_FORMAT_R16G16B16A16_SNORM;
1357 case SpvImageFormatRg16Snorm: return PIPE_FORMAT_R16G16_SNORM;
1358 case SpvImageFormatRg8Snorm: return PIPE_FORMAT_R8G8_SNORM;
1359 case SpvImageFormatR16Snorm: return PIPE_FORMAT_R16_SNORM;
1360 case SpvImageFormatR8Snorm: return PIPE_FORMAT_R8_SNORM;
1361 case SpvImageFormatRgba32i: return PIPE_FORMAT_R32G32B32A32_SINT;
1362 case SpvImageFormatRgba16i: return PIPE_FORMAT_R16G16B16A16_SINT;
1363 case SpvImageFormatRgba8i: return PIPE_FORMAT_R8G8B8A8_SINT;
1364 case SpvImageFormatR32i: return PIPE_FORMAT_R32_SINT;
1365 case SpvImageFormatRg32i: return PIPE_FORMAT_R32G32_SINT;
1366 case SpvImageFormatRg16i: return PIPE_FORMAT_R16G16_SINT;
1367 case SpvImageFormatRg8i: return PIPE_FORMAT_R8G8_SINT;
1368 case SpvImageFormatR16i: return PIPE_FORMAT_R16_SINT;
1369 case SpvImageFormatR8i: return PIPE_FORMAT_R8_SINT;
1370 case SpvImageFormatRgba32ui: return PIPE_FORMAT_R32G32B32A32_UINT;
1371 case SpvImageFormatRgba16ui: return PIPE_FORMAT_R16G16B16A16_UINT;
1372 case SpvImageFormatRgba8ui: return PIPE_FORMAT_R8G8B8A8_UINT;
1373 case SpvImageFormatR32ui: return PIPE_FORMAT_R32_UINT;
1374 case SpvImageFormatRgb10a2ui: return PIPE_FORMAT_R10G10B10A2_UINT;
1375 case SpvImageFormatRg32ui: return PIPE_FORMAT_R32G32_UINT;
1376 case SpvImageFormatRg16ui: return PIPE_FORMAT_R16G16_UINT;
1377 case SpvImageFormatRg8ui: return PIPE_FORMAT_R8G8_UINT;
1378 case SpvImageFormatR16ui: return PIPE_FORMAT_R16_UINT;
1379 case SpvImageFormatR8ui: return PIPE_FORMAT_R8_UINT;
1380 case SpvImageFormatR64ui: return PIPE_FORMAT_R64_UINT;
1381 case SpvImageFormatR64i: return PIPE_FORMAT_R64_SINT;
1382 default:
1383 vtn_fail("Invalid image format: %s (%u)",
1384 spirv_imageformat_to_string(format), format);
1385 }
1386 }
1387
1388 static void
vtn_handle_type(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1389 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
1390 const uint32_t *w, unsigned count)
1391 {
1392 struct vtn_value *val = NULL;
1393
1394 /* In order to properly handle forward declarations, we have to defer
1395 * allocation for pointer types.
1396 */
1397 if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) {
1398 val = vtn_push_value(b, w[1], vtn_value_type_type);
1399 vtn_fail_if(val->type != NULL,
1400 "Only pointers can have forward declarations");
1401 val->type = rzalloc(b, struct vtn_type);
1402 val->type->id = w[1];
1403 }
1404
1405 switch (opcode) {
1406 case SpvOpTypeVoid:
1407 val->type->base_type = vtn_base_type_void;
1408 val->type->type = glsl_void_type();
1409 break;
1410 case SpvOpTypeBool:
1411 val->type->base_type = vtn_base_type_scalar;
1412 val->type->type = glsl_bool_type();
1413 val->type->length = 1;
1414 break;
1415 case SpvOpTypeInt: {
1416 int bit_size = w[2];
1417 const bool signedness = w[3];
1418 vtn_fail_if(bit_size != 8 && bit_size != 16 &&
1419 bit_size != 32 && bit_size != 64,
1420 "Invalid int bit size: %u", bit_size);
1421 val->type->base_type = vtn_base_type_scalar;
1422 val->type->type = signedness ? glsl_intN_t_type(bit_size) :
1423 glsl_uintN_t_type(bit_size);
1424 val->type->length = 1;
1425 break;
1426 }
1427
1428 case SpvOpTypeFloat: {
1429 int bit_size = w[2];
1430 val->type->base_type = vtn_base_type_scalar;
1431 vtn_fail_if(bit_size != 16 && bit_size != 32 && bit_size != 64,
1432 "Invalid float bit size: %u", bit_size);
1433 val->type->type = glsl_floatN_t_type(bit_size);
1434 val->type->length = 1;
1435 break;
1436 }
1437
1438 case SpvOpTypeVector: {
1439 struct vtn_type *base = vtn_get_type(b, w[2]);
1440 unsigned elems = w[3];
1441
1442 vtn_fail_if(base->base_type != vtn_base_type_scalar,
1443 "Base type for OpTypeVector must be a scalar");
1444 vtn_fail_if((elems < 2 || elems > 4) && (elems != 8) && (elems != 16),
1445 "Invalid component count for OpTypeVector");
1446
1447 val->type->base_type = vtn_base_type_vector;
1448 val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
1449 val->type->length = elems;
1450 val->type->stride = glsl_type_is_boolean(val->type->type)
1451 ? 4 : glsl_get_bit_size(base->type) / 8;
1452 val->type->array_element = base;
1453 break;
1454 }
1455
1456 case SpvOpTypeMatrix: {
1457 struct vtn_type *base = vtn_get_type(b, w[2]);
1458 unsigned columns = w[3];
1459
1460 vtn_fail_if(base->base_type != vtn_base_type_vector,
1461 "Base type for OpTypeMatrix must be a vector");
1462 vtn_fail_if(columns < 2 || columns > 4,
1463 "Invalid column count for OpTypeMatrix");
1464
1465 val->type->base_type = vtn_base_type_matrix;
1466 val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
1467 glsl_get_vector_elements(base->type),
1468 columns);
1469 vtn_fail_if(glsl_type_is_error(val->type->type),
1470 "Unsupported base type for OpTypeMatrix");
1471 assert(!glsl_type_is_error(val->type->type));
1472 val->type->length = columns;
1473 val->type->array_element = base;
1474 val->type->row_major = false;
1475 val->type->stride = 0;
1476 break;
1477 }
1478
1479 case SpvOpTypeRuntimeArray:
1480 case SpvOpTypeArray: {
1481 struct vtn_type *array_element = vtn_get_type(b, w[2]);
1482
1483 if (opcode == SpvOpTypeRuntimeArray) {
1484 /* A length of 0 is used to denote unsized arrays */
1485 val->type->length = 0;
1486 } else {
1487 val->type->length = vtn_constant_uint(b, w[3]);
1488 }
1489
1490 val->type->base_type = vtn_base_type_array;
1491 val->type->array_element = array_element;
1492
1493 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1494 val->type->type = glsl_array_type(array_element->type, val->type->length,
1495 val->type->stride);
1496 break;
1497 }
1498
1499 case SpvOpTypeStruct: {
1500 unsigned num_fields = count - 2;
1501 val->type->base_type = vtn_base_type_struct;
1502 val->type->length = num_fields;
1503 val->type->members = ralloc_array(b, struct vtn_type *, num_fields);
1504 val->type->offsets = ralloc_array(b, unsigned, num_fields);
1505 val->type->packed = false;
1506
1507 NIR_VLA(struct glsl_struct_field, fields, count);
1508 for (unsigned i = 0; i < num_fields; i++) {
1509 val->type->members[i] = vtn_get_type(b, w[i + 2]);
1510 fields[i] = (struct glsl_struct_field) {
1511 .type = val->type->members[i]->type,
1512 .name = ralloc_asprintf(b, "field%d", i),
1513 .location = -1,
1514 .offset = -1,
1515 };
1516 }
1517
1518 vtn_foreach_decoration(b, val, struct_packed_decoration_cb, NULL);
1519
1520 struct member_decoration_ctx ctx = {
1521 .num_fields = num_fields,
1522 .fields = fields,
1523 .type = val->type
1524 };
1525
1526 vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
1527
1528 /* Propagate access specifiers that are present on all members to the overall type */
1529 enum gl_access_qualifier overall_access = ACCESS_COHERENT | ACCESS_VOLATILE |
1530 ACCESS_NON_READABLE | ACCESS_NON_WRITEABLE;
1531 for (unsigned i = 0; i < num_fields; ++i)
1532 overall_access &= val->type->members[i]->access;
1533 val->type->access = overall_access;
1534
1535 vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx);
1536
1537 vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL);
1538
1539 const char *name = val->name;
1540
1541 if (val->type->block || val->type->buffer_block) {
1542 /* Packing will be ignored since types coming from SPIR-V are
1543 * explicitly laid out.
1544 */
1545 val->type->type = glsl_interface_type(fields, num_fields,
1546 /* packing */ 0, false,
1547 name ? name : "block");
1548 } else {
1549 val->type->type = glsl_struct_type(fields, num_fields,
1550 name ? name : "struct",
1551 val->type->packed);
1552 }
1553 break;
1554 }
1555
1556 case SpvOpTypeFunction: {
1557 val->type->base_type = vtn_base_type_function;
1558 val->type->type = NULL;
1559
1560 val->type->return_type = vtn_get_type(b, w[2]);
1561
1562 const unsigned num_params = count - 3;
1563 val->type->length = num_params;
1564 val->type->params = ralloc_array(b, struct vtn_type *, num_params);
1565 for (unsigned i = 0; i < count - 3; i++) {
1566 val->type->params[i] = vtn_get_type(b, w[i + 3]);
1567 }
1568 break;
1569 }
1570
1571 case SpvOpTypePointer:
1572 case SpvOpTypeForwardPointer: {
1573 /* We can't blindly push the value because it might be a forward
1574 * declaration.
1575 */
1576 val = vtn_untyped_value(b, w[1]);
1577
1578 SpvStorageClass storage_class = w[2];
1579
1580 vtn_fail_if(opcode == SpvOpTypeForwardPointer &&
1581 b->shader->info.stage != MESA_SHADER_KERNEL &&
1582 storage_class != SpvStorageClassPhysicalStorageBuffer,
1583 "OpTypeForwardPointer is only allowed in Vulkan with "
1584 "the PhysicalStorageBuffer storage class");
1585
1586 struct vtn_type *deref_type = NULL;
1587 if (opcode == SpvOpTypePointer)
1588 deref_type = vtn_get_type(b, w[3]);
1589
1590 if (val->value_type == vtn_value_type_invalid) {
1591 val->value_type = vtn_value_type_type;
1592 val->type = rzalloc(b, struct vtn_type);
1593 val->type->id = w[1];
1594 val->type->base_type = vtn_base_type_pointer;
1595 val->type->storage_class = storage_class;
1596
1597 /* These can actually be stored to nir_variables and used as SSA
1598 * values so they need a real glsl_type.
1599 */
1600 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
1601 b, storage_class, deref_type, NULL);
1602
1603 /* The deref type should only matter for the UniformConstant storage
1604 * class. In particular, it should never matter for any storage
1605 * classes that are allowed in combination with OpTypeForwardPointer.
1606 */
1607 if (storage_class != SpvStorageClassUniform &&
1608 storage_class != SpvStorageClassUniformConstant) {
1609 assert(mode == vtn_storage_class_to_mode(b, storage_class,
1610 NULL, NULL));
1611 }
1612
1613 val->type->type = nir_address_format_to_glsl_type(
1614 vtn_mode_to_address_format(b, mode));
1615 } else {
1616 vtn_fail_if(val->type->storage_class != storage_class,
1617 "The storage classes of an OpTypePointer and any "
1618 "OpTypeForwardPointers that provide forward "
1619 "declarations of it must match.");
1620 }
1621
1622 if (opcode == SpvOpTypePointer) {
1623 vtn_fail_if(val->type->deref != NULL,
1624 "While OpTypeForwardPointer can be used to provide a "
1625 "forward declaration of a pointer, OpTypePointer can "
1626 "only be used once for a given id.");
1627
1628 val->type->deref = deref_type;
1629
1630 /* Only certain storage classes use ArrayStride. */
1631 switch (storage_class) {
1632 case SpvStorageClassWorkgroup:
1633 if (!b->options->caps.workgroup_memory_explicit_layout)
1634 break;
1635 FALLTHROUGH;
1636
1637 case SpvStorageClassUniform:
1638 case SpvStorageClassPushConstant:
1639 case SpvStorageClassStorageBuffer:
1640 case SpvStorageClassPhysicalStorageBuffer:
1641 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1642 break;
1643
1644 default:
1645 /* Nothing to do. */
1646 break;
1647 }
1648 }
1649 break;
1650 }
1651
1652 case SpvOpTypeImage: {
1653 val->type->base_type = vtn_base_type_image;
1654
1655 /* Images are represented in NIR as a scalar SSA value that is the
1656 * result of a deref instruction. An OpLoad on an OpTypeImage pointer
1657 * from UniformConstant memory just takes the NIR deref from the pointer
1658 * and turns it into an SSA value.
1659 */
1660 val->type->type = nir_address_format_to_glsl_type(
1661 vtn_mode_to_address_format(b, vtn_variable_mode_function));
1662
1663 const struct vtn_type *sampled_type = vtn_get_type(b, w[2]);
1664 if (b->shader->info.stage == MESA_SHADER_KERNEL) {
1665 vtn_fail_if(sampled_type->base_type != vtn_base_type_void,
1666 "Sampled type of OpTypeImage must be void for kernels");
1667 } else {
1668 vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar,
1669 "Sampled type of OpTypeImage must be a scalar");
1670 if (b->options->caps.image_atomic_int64) {
1671 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32 &&
1672 glsl_get_bit_size(sampled_type->type) != 64,
1673 "Sampled type of OpTypeImage must be a 32 or 64-bit "
1674 "scalar");
1675 } else {
1676 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32,
1677 "Sampled type of OpTypeImage must be a 32-bit scalar");
1678 }
1679 }
1680
1681 enum glsl_sampler_dim dim;
1682 switch ((SpvDim)w[3]) {
1683 case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break;
1684 case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break;
1685 case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break;
1686 case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break;
1687 case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break;
1688 case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break;
1689 case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break;
1690 default:
1691 vtn_fail("Invalid SPIR-V image dimensionality: %s (%u)",
1692 spirv_dim_to_string((SpvDim)w[3]), w[3]);
1693 }
1694
1695 /* w[4]: as per Vulkan spec "Validation Rules within a Module",
1696 * The “Depth” operand of OpTypeImage is ignored.
1697 */
1698 bool is_array = w[5];
1699 bool multisampled = w[6];
1700 unsigned sampled = w[7];
1701 SpvImageFormat format = w[8];
1702
1703 if (count > 9)
1704 val->type->access_qualifier = w[9];
1705 else if (b->shader->info.stage == MESA_SHADER_KERNEL)
1706 /* Per the CL C spec: If no qualifier is provided, read_only is assumed. */
1707 val->type->access_qualifier = SpvAccessQualifierReadOnly;
1708 else
1709 val->type->access_qualifier = SpvAccessQualifierReadWrite;
1710
1711 if (multisampled) {
1712 if (dim == GLSL_SAMPLER_DIM_2D)
1713 dim = GLSL_SAMPLER_DIM_MS;
1714 else if (dim == GLSL_SAMPLER_DIM_SUBPASS)
1715 dim = GLSL_SAMPLER_DIM_SUBPASS_MS;
1716 else
1717 vtn_fail("Unsupported multisampled image type");
1718 }
1719
1720 val->type->image_format = translate_image_format(b, format);
1721
1722 enum glsl_base_type sampled_base_type =
1723 glsl_get_base_type(sampled_type->type);
1724 if (sampled == 1) {
1725 val->type->glsl_image = glsl_sampler_type(dim, false, is_array,
1726 sampled_base_type);
1727 } else if (sampled == 2) {
1728 val->type->glsl_image = glsl_image_type(dim, is_array,
1729 sampled_base_type);
1730 } else if (b->shader->info.stage == MESA_SHADER_KERNEL) {
1731 val->type->glsl_image = glsl_image_type(dim, is_array,
1732 GLSL_TYPE_VOID);
1733 } else {
1734 vtn_fail("We need to know if the image will be sampled");
1735 }
1736 break;
1737 }
1738
1739 case SpvOpTypeSampledImage: {
1740 val->type->base_type = vtn_base_type_sampled_image;
1741 val->type->image = vtn_get_type(b, w[2]);
1742
1743 /* Sampled images are represented NIR as a vec2 SSA value where each
1744 * component is the result of a deref instruction. The first component
1745 * is the image and the second is the sampler. An OpLoad on an
1746 * OpTypeSampledImage pointer from UniformConstant memory just takes
1747 * the NIR deref from the pointer and duplicates it to both vector
1748 * components.
1749 */
1750 nir_address_format addr_format =
1751 vtn_mode_to_address_format(b, vtn_variable_mode_function);
1752 assert(nir_address_format_num_components(addr_format) == 1);
1753 unsigned bit_size = nir_address_format_bit_size(addr_format);
1754 assert(bit_size == 32 || bit_size == 64);
1755
1756 enum glsl_base_type base_type =
1757 bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64;
1758 val->type->type = glsl_vector_type(base_type, 2);
1759 break;
1760 }
1761
1762 case SpvOpTypeSampler:
1763 val->type->base_type = vtn_base_type_sampler;
1764
1765 /* Samplers are represented in NIR as a scalar SSA value that is the
1766 * result of a deref instruction. An OpLoad on an OpTypeSampler pointer
1767 * from UniformConstant memory just takes the NIR deref from the pointer
1768 * and turns it into an SSA value.
1769 */
1770 val->type->type = nir_address_format_to_glsl_type(
1771 vtn_mode_to_address_format(b, vtn_variable_mode_function));
1772 break;
1773
1774 case SpvOpTypeAccelerationStructureKHR:
1775 val->type->base_type = vtn_base_type_accel_struct;
1776 val->type->type = glsl_uint64_t_type();
1777 break;
1778
1779 case SpvOpTypeOpaque:
1780 val->type->base_type = vtn_base_type_struct;
1781 const char *name = vtn_string_literal(b, &w[2], count - 2, NULL);
1782 val->type->type = glsl_struct_type(NULL, 0, name, false);
1783 break;
1784
1785 case SpvOpTypeEvent:
1786 val->type->base_type = vtn_base_type_event;
1787 val->type->type = glsl_int_type();
1788 break;
1789
1790 case SpvOpTypeDeviceEvent:
1791 case SpvOpTypeReserveId:
1792 case SpvOpTypeQueue:
1793 case SpvOpTypePipe:
1794 default:
1795 vtn_fail_with_opcode("Unhandled opcode", opcode);
1796 }
1797
1798 vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
1799
1800 if (val->type->base_type == vtn_base_type_struct &&
1801 (val->type->block || val->type->buffer_block)) {
1802 for (unsigned i = 0; i < val->type->length; i++) {
1803 vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]),
1804 "Block and BufferBlock decorations cannot decorate a "
1805 "structure type that is nested at any level inside "
1806 "another structure type decorated with Block or "
1807 "BufferBlock.");
1808 }
1809 }
1810 }
1811
1812 static nir_constant *
vtn_null_constant(struct vtn_builder * b,struct vtn_type * type)1813 vtn_null_constant(struct vtn_builder *b, struct vtn_type *type)
1814 {
1815 nir_constant *c = rzalloc(b, nir_constant);
1816
1817 switch (type->base_type) {
1818 case vtn_base_type_scalar:
1819 case vtn_base_type_vector:
1820 /* Nothing to do here. It's already initialized to zero */
1821 break;
1822
1823 case vtn_base_type_pointer: {
1824 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
1825 b, type->storage_class, type->deref, NULL);
1826 nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
1827
1828 const nir_const_value *null_value = nir_address_format_null_value(addr_format);
1829 memcpy(c->values, null_value,
1830 sizeof(nir_const_value) * nir_address_format_num_components(addr_format));
1831 break;
1832 }
1833
1834 case vtn_base_type_void:
1835 case vtn_base_type_image:
1836 case vtn_base_type_sampler:
1837 case vtn_base_type_sampled_image:
1838 case vtn_base_type_function:
1839 case vtn_base_type_event:
1840 /* For those we have to return something but it doesn't matter what. */
1841 break;
1842
1843 case vtn_base_type_matrix:
1844 case vtn_base_type_array:
1845 vtn_assert(type->length > 0);
1846 c->num_elements = type->length;
1847 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
1848
1849 c->elements[0] = vtn_null_constant(b, type->array_element);
1850 for (unsigned i = 1; i < c->num_elements; i++)
1851 c->elements[i] = c->elements[0];
1852 break;
1853
1854 case vtn_base_type_struct:
1855 c->num_elements = type->length;
1856 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
1857 for (unsigned i = 0; i < c->num_elements; i++)
1858 c->elements[i] = vtn_null_constant(b, type->members[i]);
1859 break;
1860
1861 default:
1862 vtn_fail("Invalid type for null constant");
1863 }
1864
1865 return c;
1866 }
1867
1868 static void
spec_constant_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,void * data)1869 spec_constant_decoration_cb(struct vtn_builder *b, UNUSED struct vtn_value *val,
1870 ASSERTED int member,
1871 const struct vtn_decoration *dec, void *data)
1872 {
1873 vtn_assert(member == -1);
1874 if (dec->decoration != SpvDecorationSpecId)
1875 return;
1876
1877 nir_const_value *value = data;
1878 for (unsigned i = 0; i < b->num_specializations; i++) {
1879 if (b->specializations[i].id == dec->operands[0]) {
1880 *value = b->specializations[i].value;
1881 return;
1882 }
1883 }
1884 }
1885
1886 static void
handle_workgroup_size_decoration_cb(struct vtn_builder * b,struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,UNUSED void * data)1887 handle_workgroup_size_decoration_cb(struct vtn_builder *b,
1888 struct vtn_value *val,
1889 ASSERTED int member,
1890 const struct vtn_decoration *dec,
1891 UNUSED void *data)
1892 {
1893 vtn_assert(member == -1);
1894 if (dec->decoration != SpvDecorationBuiltIn ||
1895 dec->operands[0] != SpvBuiltInWorkgroupSize)
1896 return;
1897
1898 vtn_assert(val->type->type == glsl_vector_type(GLSL_TYPE_UINT, 3));
1899 b->workgroup_size_builtin = val;
1900 }
1901
1902 static void
vtn_handle_constant(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1903 vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
1904 const uint32_t *w, unsigned count)
1905 {
1906 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
1907 val->constant = rzalloc(b, nir_constant);
1908 switch (opcode) {
1909 case SpvOpConstantTrue:
1910 case SpvOpConstantFalse:
1911 case SpvOpSpecConstantTrue:
1912 case SpvOpSpecConstantFalse: {
1913 vtn_fail_if(val->type->type != glsl_bool_type(),
1914 "Result type of %s must be OpTypeBool",
1915 spirv_op_to_string(opcode));
1916
1917 bool bval = (opcode == SpvOpConstantTrue ||
1918 opcode == SpvOpSpecConstantTrue);
1919
1920 nir_const_value u32val = nir_const_value_for_uint(bval, 32);
1921
1922 if (opcode == SpvOpSpecConstantTrue ||
1923 opcode == SpvOpSpecConstantFalse)
1924 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32val);
1925
1926 val->constant->values[0].b = u32val.u32 != 0;
1927 break;
1928 }
1929
1930 case SpvOpConstant:
1931 case SpvOpSpecConstant: {
1932 vtn_fail_if(val->type->base_type != vtn_base_type_scalar,
1933 "Result type of %s must be a scalar",
1934 spirv_op_to_string(opcode));
1935 int bit_size = glsl_get_bit_size(val->type->type);
1936 switch (bit_size) {
1937 case 64:
1938 val->constant->values[0].u64 = vtn_u64_literal(&w[3]);
1939 break;
1940 case 32:
1941 val->constant->values[0].u32 = w[3];
1942 break;
1943 case 16:
1944 val->constant->values[0].u16 = w[3];
1945 break;
1946 case 8:
1947 val->constant->values[0].u8 = w[3];
1948 break;
1949 default:
1950 vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size);
1951 }
1952
1953 if (opcode == SpvOpSpecConstant)
1954 vtn_foreach_decoration(b, val, spec_constant_decoration_cb,
1955 &val->constant->values[0]);
1956 break;
1957 }
1958
1959 case SpvOpSpecConstantComposite:
1960 case SpvOpConstantComposite: {
1961 unsigned elem_count = count - 3;
1962 vtn_fail_if(elem_count != val->type->length,
1963 "%s has %u constituents, expected %u",
1964 spirv_op_to_string(opcode), elem_count, val->type->length);
1965
1966 nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
1967 val->is_undef_constant = true;
1968 for (unsigned i = 0; i < elem_count; i++) {
1969 struct vtn_value *elem_val = vtn_untyped_value(b, w[i + 3]);
1970
1971 if (elem_val->value_type == vtn_value_type_constant) {
1972 elems[i] = elem_val->constant;
1973 val->is_undef_constant = val->is_undef_constant &&
1974 elem_val->is_undef_constant;
1975 } else {
1976 vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
1977 "only constants or undefs allowed for "
1978 "SpvOpConstantComposite");
1979 /* to make it easier, just insert a NULL constant for now */
1980 elems[i] = vtn_null_constant(b, elem_val->type);
1981 }
1982 }
1983
1984 switch (val->type->base_type) {
1985 case vtn_base_type_vector: {
1986 assert(glsl_type_is_vector(val->type->type));
1987 for (unsigned i = 0; i < elem_count; i++)
1988 val->constant->values[i] = elems[i]->values[0];
1989 break;
1990 }
1991
1992 case vtn_base_type_matrix:
1993 case vtn_base_type_struct:
1994 case vtn_base_type_array:
1995 ralloc_steal(val->constant, elems);
1996 val->constant->num_elements = elem_count;
1997 val->constant->elements = elems;
1998 break;
1999
2000 default:
2001 vtn_fail("Result type of %s must be a composite type",
2002 spirv_op_to_string(opcode));
2003 }
2004 break;
2005 }
2006
2007 case SpvOpSpecConstantOp: {
2008 nir_const_value u32op = nir_const_value_for_uint(w[3], 32);
2009 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32op);
2010 SpvOp opcode = u32op.u32;
2011 switch (opcode) {
2012 case SpvOpVectorShuffle: {
2013 struct vtn_value *v0 = &b->values[w[4]];
2014 struct vtn_value *v1 = &b->values[w[5]];
2015
2016 vtn_assert(v0->value_type == vtn_value_type_constant ||
2017 v0->value_type == vtn_value_type_undef);
2018 vtn_assert(v1->value_type == vtn_value_type_constant ||
2019 v1->value_type == vtn_value_type_undef);
2020
2021 unsigned len0 = glsl_get_vector_elements(v0->type->type);
2022 unsigned len1 = glsl_get_vector_elements(v1->type->type);
2023
2024 vtn_assert(len0 + len1 < 16);
2025
2026 unsigned bit_size = glsl_get_bit_size(val->type->type);
2027 unsigned bit_size0 = glsl_get_bit_size(v0->type->type);
2028 unsigned bit_size1 = glsl_get_bit_size(v1->type->type);
2029
2030 vtn_assert(bit_size == bit_size0 && bit_size == bit_size1);
2031 (void)bit_size0; (void)bit_size1;
2032
2033 nir_const_value undef = { .u64 = 0xdeadbeefdeadbeef };
2034 nir_const_value combined[NIR_MAX_VEC_COMPONENTS * 2];
2035
2036 if (v0->value_type == vtn_value_type_constant) {
2037 for (unsigned i = 0; i < len0; i++)
2038 combined[i] = v0->constant->values[i];
2039 }
2040 if (v1->value_type == vtn_value_type_constant) {
2041 for (unsigned i = 0; i < len1; i++)
2042 combined[len0 + i] = v1->constant->values[i];
2043 }
2044
2045 for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
2046 uint32_t comp = w[i + 6];
2047 if (comp == (uint32_t)-1) {
2048 /* If component is not used, set the value to a known constant
2049 * to detect if it is wrongly used.
2050 */
2051 val->constant->values[j] = undef;
2052 } else {
2053 vtn_fail_if(comp >= len0 + len1,
2054 "All Component literals must either be FFFFFFFF "
2055 "or in [0, N - 1] (inclusive).");
2056 val->constant->values[j] = combined[comp];
2057 }
2058 }
2059 break;
2060 }
2061
2062 case SpvOpCompositeExtract:
2063 case SpvOpCompositeInsert: {
2064 struct vtn_value *comp;
2065 unsigned deref_start;
2066 struct nir_constant **c;
2067 if (opcode == SpvOpCompositeExtract) {
2068 comp = vtn_value(b, w[4], vtn_value_type_constant);
2069 deref_start = 5;
2070 c = &comp->constant;
2071 } else {
2072 comp = vtn_value(b, w[5], vtn_value_type_constant);
2073 deref_start = 6;
2074 val->constant = nir_constant_clone(comp->constant,
2075 (nir_variable *)b);
2076 c = &val->constant;
2077 }
2078
2079 int elem = -1;
2080 const struct vtn_type *type = comp->type;
2081 for (unsigned i = deref_start; i < count; i++) {
2082 vtn_fail_if(w[i] > type->length,
2083 "%uth index of %s is %u but the type has only "
2084 "%u elements", i - deref_start,
2085 spirv_op_to_string(opcode), w[i], type->length);
2086
2087 switch (type->base_type) {
2088 case vtn_base_type_vector:
2089 elem = w[i];
2090 type = type->array_element;
2091 break;
2092
2093 case vtn_base_type_matrix:
2094 case vtn_base_type_array:
2095 c = &(*c)->elements[w[i]];
2096 type = type->array_element;
2097 break;
2098
2099 case vtn_base_type_struct:
2100 c = &(*c)->elements[w[i]];
2101 type = type->members[w[i]];
2102 break;
2103
2104 default:
2105 vtn_fail("%s must only index into composite types",
2106 spirv_op_to_string(opcode));
2107 }
2108 }
2109
2110 if (opcode == SpvOpCompositeExtract) {
2111 if (elem == -1) {
2112 val->constant = *c;
2113 } else {
2114 unsigned num_components = type->length;
2115 for (unsigned i = 0; i < num_components; i++)
2116 val->constant->values[i] = (*c)->values[elem + i];
2117 }
2118 } else {
2119 struct vtn_value *insert =
2120 vtn_value(b, w[4], vtn_value_type_constant);
2121 vtn_assert(insert->type == type);
2122 if (elem == -1) {
2123 *c = insert->constant;
2124 } else {
2125 unsigned num_components = type->length;
2126 for (unsigned i = 0; i < num_components; i++)
2127 (*c)->values[elem + i] = insert->constant->values[i];
2128 }
2129 }
2130 break;
2131 }
2132
2133 default: {
2134 bool swap;
2135 nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(val->type->type);
2136 nir_alu_type src_alu_type = dst_alu_type;
2137 unsigned num_components = glsl_get_vector_elements(val->type->type);
2138 unsigned bit_size;
2139
2140 vtn_assert(count <= 7);
2141
2142 switch (opcode) {
2143 case SpvOpSConvert:
2144 case SpvOpFConvert:
2145 case SpvOpUConvert:
2146 /* We have a source in a conversion */
2147 src_alu_type =
2148 nir_get_nir_type_for_glsl_type(vtn_get_value_type(b, w[4])->type);
2149 /* We use the bitsize of the conversion source to evaluate the opcode later */
2150 bit_size = glsl_get_bit_size(vtn_get_value_type(b, w[4])->type);
2151 break;
2152 default:
2153 bit_size = glsl_get_bit_size(val->type->type);
2154 };
2155
2156 bool exact;
2157 nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
2158 nir_alu_type_get_type_size(src_alu_type),
2159 nir_alu_type_get_type_size(dst_alu_type));
2160
2161 /* No SPIR-V opcodes handled through this path should set exact.
2162 * Since it is ignored, assert on it.
2163 */
2164 assert(!exact);
2165
2166 nir_const_value src[3][NIR_MAX_VEC_COMPONENTS];
2167
2168 for (unsigned i = 0; i < count - 4; i++) {
2169 struct vtn_value *src_val =
2170 vtn_value(b, w[4 + i], vtn_value_type_constant);
2171
2172 /* If this is an unsized source, pull the bit size from the
2173 * source; otherwise, we'll use the bit size from the destination.
2174 */
2175 if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]))
2176 bit_size = glsl_get_bit_size(src_val->type->type);
2177
2178 unsigned src_comps = nir_op_infos[op].input_sizes[i] ?
2179 nir_op_infos[op].input_sizes[i] :
2180 num_components;
2181
2182 unsigned j = swap ? 1 - i : i;
2183 for (unsigned c = 0; c < src_comps; c++)
2184 src[j][c] = src_val->constant->values[c];
2185 }
2186
2187 /* fix up fixed size sources */
2188 switch (op) {
2189 case nir_op_ishl:
2190 case nir_op_ishr:
2191 case nir_op_ushr: {
2192 if (bit_size == 32)
2193 break;
2194 for (unsigned i = 0; i < num_components; ++i) {
2195 switch (bit_size) {
2196 case 64: src[1][i].u32 = src[1][i].u64; break;
2197 case 16: src[1][i].u32 = src[1][i].u16; break;
2198 case 8: src[1][i].u32 = src[1][i].u8; break;
2199 }
2200 }
2201 break;
2202 }
2203 default:
2204 break;
2205 }
2206
2207 nir_const_value *srcs[3] = {
2208 src[0], src[1], src[2],
2209 };
2210 nir_eval_const_opcode(op, val->constant->values,
2211 num_components, bit_size, srcs,
2212 b->shader->info.float_controls_execution_mode);
2213 break;
2214 } /* default */
2215 }
2216 break;
2217 }
2218
2219 case SpvOpConstantNull:
2220 val->constant = vtn_null_constant(b, val->type);
2221 val->is_null_constant = true;
2222 break;
2223
2224 default:
2225 vtn_fail_with_opcode("Unhandled opcode", opcode);
2226 }
2227
2228 /* Now that we have the value, update the workgroup size if needed */
2229 if (gl_shader_stage_uses_workgroup(b->entry_point_stage))
2230 vtn_foreach_decoration(b, val, handle_workgroup_size_decoration_cb,
2231 NULL);
2232 }
2233
2234 static void
vtn_split_barrier_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics,SpvMemorySemanticsMask * before,SpvMemorySemanticsMask * after)2235 vtn_split_barrier_semantics(struct vtn_builder *b,
2236 SpvMemorySemanticsMask semantics,
2237 SpvMemorySemanticsMask *before,
2238 SpvMemorySemanticsMask *after)
2239 {
2240 /* For memory semantics embedded in operations, we split them into up to
2241 * two barriers, to be added before and after the operation. This is less
2242 * strict than if we propagated until the final backend stage, but still
2243 * result in correct execution.
2244 *
2245 * A further improvement could be pipe this information (and use!) into the
2246 * next compiler layers, at the expense of making the handling of barriers
2247 * more complicated.
2248 */
2249
2250 *before = SpvMemorySemanticsMaskNone;
2251 *after = SpvMemorySemanticsMaskNone;
2252
2253 SpvMemorySemanticsMask order_semantics =
2254 semantics & (SpvMemorySemanticsAcquireMask |
2255 SpvMemorySemanticsReleaseMask |
2256 SpvMemorySemanticsAcquireReleaseMask |
2257 SpvMemorySemanticsSequentiallyConsistentMask);
2258
2259 if (util_bitcount(order_semantics) > 1) {
2260 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2261 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2262 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2263 */
2264 vtn_warn("Multiple memory ordering semantics specified, "
2265 "assuming AcquireRelease.");
2266 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2267 }
2268
2269 const SpvMemorySemanticsMask av_vis_semantics =
2270 semantics & (SpvMemorySemanticsMakeAvailableMask |
2271 SpvMemorySemanticsMakeVisibleMask);
2272
2273 const SpvMemorySemanticsMask storage_semantics =
2274 semantics & (SpvMemorySemanticsUniformMemoryMask |
2275 SpvMemorySemanticsSubgroupMemoryMask |
2276 SpvMemorySemanticsWorkgroupMemoryMask |
2277 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2278 SpvMemorySemanticsAtomicCounterMemoryMask |
2279 SpvMemorySemanticsImageMemoryMask |
2280 SpvMemorySemanticsOutputMemoryMask);
2281
2282 const SpvMemorySemanticsMask other_semantics =
2283 semantics & ~(order_semantics | av_vis_semantics | storage_semantics |
2284 SpvMemorySemanticsVolatileMask);
2285
2286 if (other_semantics)
2287 vtn_warn("Ignoring unhandled memory semantics: %u\n", other_semantics);
2288
2289 /* SequentiallyConsistent is treated as AcquireRelease. */
2290
2291 /* The RELEASE barrier happens BEFORE the operation, and it is usually
2292 * associated with a Store. All the write operations with a matching
2293 * semantics will not be reordered after the Store.
2294 */
2295 if (order_semantics & (SpvMemorySemanticsReleaseMask |
2296 SpvMemorySemanticsAcquireReleaseMask |
2297 SpvMemorySemanticsSequentiallyConsistentMask)) {
2298 *before |= SpvMemorySemanticsReleaseMask | storage_semantics;
2299 }
2300
2301 /* The ACQUIRE barrier happens AFTER the operation, and it is usually
2302 * associated with a Load. All the operations with a matching semantics
2303 * will not be reordered before the Load.
2304 */
2305 if (order_semantics & (SpvMemorySemanticsAcquireMask |
2306 SpvMemorySemanticsAcquireReleaseMask |
2307 SpvMemorySemanticsSequentiallyConsistentMask)) {
2308 *after |= SpvMemorySemanticsAcquireMask | storage_semantics;
2309 }
2310
2311 if (av_vis_semantics & SpvMemorySemanticsMakeVisibleMask)
2312 *before |= SpvMemorySemanticsMakeVisibleMask | storage_semantics;
2313
2314 if (av_vis_semantics & SpvMemorySemanticsMakeAvailableMask)
2315 *after |= SpvMemorySemanticsMakeAvailableMask | storage_semantics;
2316 }
2317
2318 static nir_memory_semantics
vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2319 vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder *b,
2320 SpvMemorySemanticsMask semantics)
2321 {
2322 nir_memory_semantics nir_semantics = 0;
2323
2324 SpvMemorySemanticsMask order_semantics =
2325 semantics & (SpvMemorySemanticsAcquireMask |
2326 SpvMemorySemanticsReleaseMask |
2327 SpvMemorySemanticsAcquireReleaseMask |
2328 SpvMemorySemanticsSequentiallyConsistentMask);
2329
2330 if (util_bitcount(order_semantics) > 1) {
2331 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2332 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2333 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2334 */
2335 vtn_warn("Multiple memory ordering semantics bits specified, "
2336 "assuming AcquireRelease.");
2337 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2338 }
2339
2340 switch (order_semantics) {
2341 case 0:
2342 /* Not an ordering barrier. */
2343 break;
2344
2345 case SpvMemorySemanticsAcquireMask:
2346 nir_semantics = NIR_MEMORY_ACQUIRE;
2347 break;
2348
2349 case SpvMemorySemanticsReleaseMask:
2350 nir_semantics = NIR_MEMORY_RELEASE;
2351 break;
2352
2353 case SpvMemorySemanticsSequentiallyConsistentMask:
2354 FALLTHROUGH; /* Treated as AcquireRelease in Vulkan. */
2355 case SpvMemorySemanticsAcquireReleaseMask:
2356 nir_semantics = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE;
2357 break;
2358
2359 default:
2360 unreachable("Invalid memory order semantics");
2361 }
2362
2363 if (semantics & SpvMemorySemanticsMakeAvailableMask) {
2364 vtn_fail_if(!b->options->caps.vk_memory_model,
2365 "To use MakeAvailable memory semantics the VulkanMemoryModel "
2366 "capability must be declared.");
2367 nir_semantics |= NIR_MEMORY_MAKE_AVAILABLE;
2368 }
2369
2370 if (semantics & SpvMemorySemanticsMakeVisibleMask) {
2371 vtn_fail_if(!b->options->caps.vk_memory_model,
2372 "To use MakeVisible memory semantics the VulkanMemoryModel "
2373 "capability must be declared.");
2374 nir_semantics |= NIR_MEMORY_MAKE_VISIBLE;
2375 }
2376
2377 return nir_semantics;
2378 }
2379
2380 static nir_variable_mode
vtn_mem_semantics_to_nir_var_modes(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2381 vtn_mem_semantics_to_nir_var_modes(struct vtn_builder *b,
2382 SpvMemorySemanticsMask semantics)
2383 {
2384 /* Vulkan Environment for SPIR-V says "SubgroupMemory, CrossWorkgroupMemory,
2385 * and AtomicCounterMemory are ignored".
2386 */
2387 if (b->options->environment == NIR_SPIRV_VULKAN) {
2388 semantics &= ~(SpvMemorySemanticsSubgroupMemoryMask |
2389 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2390 SpvMemorySemanticsAtomicCounterMemoryMask);
2391 }
2392
2393 /* TODO: Consider adding nir_var_mem_image mode to NIR so it can be used
2394 * for SpvMemorySemanticsImageMemoryMask.
2395 */
2396
2397 nir_variable_mode modes = 0;
2398 if (semantics & (SpvMemorySemanticsUniformMemoryMask |
2399 SpvMemorySemanticsImageMemoryMask)) {
2400 modes |= nir_var_uniform |
2401 nir_var_mem_ubo |
2402 nir_var_mem_ssbo |
2403 nir_var_mem_global;
2404 }
2405 if (semantics & SpvMemorySemanticsWorkgroupMemoryMask)
2406 modes |= nir_var_mem_shared;
2407 if (semantics & SpvMemorySemanticsCrossWorkgroupMemoryMask)
2408 modes |= nir_var_mem_global;
2409 if (semantics & SpvMemorySemanticsOutputMemoryMask) {
2410 modes |= nir_var_shader_out;
2411 }
2412
2413 return modes;
2414 }
2415
2416 static nir_scope
vtn_scope_to_nir_scope(struct vtn_builder * b,SpvScope scope)2417 vtn_scope_to_nir_scope(struct vtn_builder *b, SpvScope scope)
2418 {
2419 nir_scope nir_scope;
2420 switch (scope) {
2421 case SpvScopeDevice:
2422 vtn_fail_if(b->options->caps.vk_memory_model &&
2423 !b->options->caps.vk_memory_model_device_scope,
2424 "If the Vulkan memory model is declared and any instruction "
2425 "uses Device scope, the VulkanMemoryModelDeviceScope "
2426 "capability must be declared.");
2427 nir_scope = NIR_SCOPE_DEVICE;
2428 break;
2429
2430 case SpvScopeQueueFamily:
2431 vtn_fail_if(!b->options->caps.vk_memory_model,
2432 "To use Queue Family scope, the VulkanMemoryModel capability "
2433 "must be declared.");
2434 nir_scope = NIR_SCOPE_QUEUE_FAMILY;
2435 break;
2436
2437 case SpvScopeWorkgroup:
2438 nir_scope = NIR_SCOPE_WORKGROUP;
2439 break;
2440
2441 case SpvScopeSubgroup:
2442 nir_scope = NIR_SCOPE_SUBGROUP;
2443 break;
2444
2445 case SpvScopeInvocation:
2446 nir_scope = NIR_SCOPE_INVOCATION;
2447 break;
2448
2449 case SpvScopeShaderCallKHR:
2450 nir_scope = NIR_SCOPE_SHADER_CALL;
2451 break;
2452
2453 default:
2454 vtn_fail("Invalid memory scope");
2455 }
2456
2457 return nir_scope;
2458 }
2459
2460 static void
vtn_emit_scoped_control_barrier(struct vtn_builder * b,SpvScope exec_scope,SpvScope mem_scope,SpvMemorySemanticsMask semantics)2461 vtn_emit_scoped_control_barrier(struct vtn_builder *b, SpvScope exec_scope,
2462 SpvScope mem_scope,
2463 SpvMemorySemanticsMask semantics)
2464 {
2465 nir_memory_semantics nir_semantics =
2466 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2467 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2468 nir_scope nir_exec_scope = vtn_scope_to_nir_scope(b, exec_scope);
2469
2470 /* Memory semantics is optional for OpControlBarrier. */
2471 nir_scope nir_mem_scope;
2472 if (nir_semantics == 0 || modes == 0)
2473 nir_mem_scope = NIR_SCOPE_NONE;
2474 else
2475 nir_mem_scope = vtn_scope_to_nir_scope(b, mem_scope);
2476
2477 nir_scoped_barrier(&b->nb, .execution_scope=nir_exec_scope, .memory_scope=nir_mem_scope,
2478 .memory_semantics=nir_semantics, .memory_modes=modes);
2479 }
2480
2481 static void
vtn_emit_scoped_memory_barrier(struct vtn_builder * b,SpvScope scope,SpvMemorySemanticsMask semantics)2482 vtn_emit_scoped_memory_barrier(struct vtn_builder *b, SpvScope scope,
2483 SpvMemorySemanticsMask semantics)
2484 {
2485 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2486 nir_memory_semantics nir_semantics =
2487 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2488
2489 /* No barrier to add. */
2490 if (nir_semantics == 0 || modes == 0)
2491 return;
2492
2493 nir_scoped_barrier(&b->nb, .memory_scope=vtn_scope_to_nir_scope(b, scope),
2494 .memory_semantics=nir_semantics,
2495 .memory_modes=modes);
2496 }
2497
2498 struct vtn_ssa_value *
vtn_create_ssa_value(struct vtn_builder * b,const struct glsl_type * type)2499 vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
2500 {
2501 /* Always use bare types for SSA values for a couple of reasons:
2502 *
2503 * 1. Code which emits deref chains should never listen to the explicit
2504 * layout information on the SSA value if any exists. If we've
2505 * accidentally been relying on this, we want to find those bugs.
2506 *
2507 * 2. We want to be able to quickly check that an SSA value being assigned
2508 * to a SPIR-V value has the right type. Using bare types everywhere
2509 * ensures that we can pointer-compare.
2510 */
2511 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
2512 val->type = glsl_get_bare_type(type);
2513
2514
2515 if (!glsl_type_is_vector_or_scalar(type)) {
2516 unsigned elems = glsl_get_length(val->type);
2517 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
2518 if (glsl_type_is_array_or_matrix(type)) {
2519 const struct glsl_type *elem_type = glsl_get_array_element(type);
2520 for (unsigned i = 0; i < elems; i++)
2521 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2522 } else {
2523 vtn_assert(glsl_type_is_struct_or_ifc(type));
2524 for (unsigned i = 0; i < elems; i++) {
2525 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
2526 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2527 }
2528 }
2529 }
2530
2531 return val;
2532 }
2533
2534 static nir_tex_src
vtn_tex_src(struct vtn_builder * b,unsigned index,nir_tex_src_type type)2535 vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
2536 {
2537 nir_tex_src src;
2538 src.src = nir_src_for_ssa(vtn_get_nir_ssa(b, index));
2539 src.src_type = type;
2540 return src;
2541 }
2542
2543 static uint32_t
image_operand_arg(struct vtn_builder * b,const uint32_t * w,uint32_t count,uint32_t mask_idx,SpvImageOperandsMask op)2544 image_operand_arg(struct vtn_builder *b, const uint32_t *w, uint32_t count,
2545 uint32_t mask_idx, SpvImageOperandsMask op)
2546 {
2547 static const SpvImageOperandsMask ops_with_arg =
2548 SpvImageOperandsBiasMask |
2549 SpvImageOperandsLodMask |
2550 SpvImageOperandsGradMask |
2551 SpvImageOperandsConstOffsetMask |
2552 SpvImageOperandsOffsetMask |
2553 SpvImageOperandsConstOffsetsMask |
2554 SpvImageOperandsSampleMask |
2555 SpvImageOperandsMinLodMask |
2556 SpvImageOperandsMakeTexelAvailableMask |
2557 SpvImageOperandsMakeTexelVisibleMask;
2558
2559 assert(util_bitcount(op) == 1);
2560 assert(w[mask_idx] & op);
2561 assert(op & ops_with_arg);
2562
2563 uint32_t idx = util_bitcount(w[mask_idx] & (op - 1) & ops_with_arg) + 1;
2564
2565 /* Adjust indices for operands with two arguments. */
2566 static const SpvImageOperandsMask ops_with_two_args =
2567 SpvImageOperandsGradMask;
2568 idx += util_bitcount(w[mask_idx] & (op - 1) & ops_with_two_args);
2569
2570 idx += mask_idx;
2571
2572 vtn_fail_if(idx + (op & ops_with_two_args ? 1 : 0) >= count,
2573 "Image op claims to have %s but does not enough "
2574 "following operands", spirv_imageoperands_to_string(op));
2575
2576 return idx;
2577 }
2578
2579 static void
non_uniform_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)2580 non_uniform_decoration_cb(struct vtn_builder *b,
2581 struct vtn_value *val, int member,
2582 const struct vtn_decoration *dec, void *void_ctx)
2583 {
2584 enum gl_access_qualifier *access = void_ctx;
2585 switch (dec->decoration) {
2586 case SpvDecorationNonUniformEXT:
2587 *access |= ACCESS_NON_UNIFORM;
2588 break;
2589
2590 default:
2591 break;
2592 }
2593 }
2594
2595 /* Apply SignExtend/ZeroExtend operands to get the actual result type for
2596 * image read/sample operations and source type for write operations.
2597 */
2598 static nir_alu_type
get_image_type(struct vtn_builder * b,nir_alu_type type,unsigned operands)2599 get_image_type(struct vtn_builder *b, nir_alu_type type, unsigned operands)
2600 {
2601 unsigned extend_operands =
2602 operands & (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask);
2603 vtn_fail_if(nir_alu_type_get_base_type(type) == nir_type_float && extend_operands,
2604 "SignExtend/ZeroExtend used on floating-point texel type");
2605 vtn_fail_if(extend_operands ==
2606 (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask),
2607 "SignExtend and ZeroExtend both specified");
2608
2609 if (operands & SpvImageOperandsSignExtendMask)
2610 return nir_type_int | nir_alu_type_get_type_size(type);
2611 if (operands & SpvImageOperandsZeroExtendMask)
2612 return nir_type_uint | nir_alu_type_get_type_size(type);
2613
2614 return type;
2615 }
2616
2617 static void
vtn_handle_texture(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)2618 vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
2619 const uint32_t *w, unsigned count)
2620 {
2621 if (opcode == SpvOpSampledImage) {
2622 struct vtn_sampled_image si = {
2623 .image = vtn_get_image(b, w[3], NULL),
2624 .sampler = vtn_get_sampler(b, w[4]),
2625 };
2626
2627 enum gl_access_qualifier access = 0;
2628 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
2629 non_uniform_decoration_cb, &access);
2630 vtn_foreach_decoration(b, vtn_untyped_value(b, w[4]),
2631 non_uniform_decoration_cb, &access);
2632
2633 vtn_push_sampled_image(b, w[2], si, access & ACCESS_NON_UNIFORM);
2634 return;
2635 } else if (opcode == SpvOpImage) {
2636 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
2637
2638 enum gl_access_qualifier access = 0;
2639 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
2640 non_uniform_decoration_cb, &access);
2641
2642 vtn_push_image(b, w[2], si.image, access & ACCESS_NON_UNIFORM);
2643 return;
2644 } else if (opcode == SpvOpImageSparseTexelsResident) {
2645 nir_ssa_def *code = vtn_get_nir_ssa(b, w[3]);
2646 vtn_push_nir_ssa(b, w[2], nir_is_sparse_texels_resident(&b->nb, code));
2647 return;
2648 }
2649
2650 nir_deref_instr *image = NULL, *sampler = NULL;
2651 struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
2652 if (sampled_val->type->base_type == vtn_base_type_sampled_image) {
2653 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
2654 image = si.image;
2655 sampler = si.sampler;
2656 } else {
2657 image = vtn_get_image(b, w[3], NULL);
2658 }
2659
2660 const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image->type);
2661 const bool is_array = glsl_sampler_type_is_array(image->type);
2662 nir_alu_type dest_type = nir_type_invalid;
2663
2664 /* Figure out the base texture operation */
2665 nir_texop texop;
2666 switch (opcode) {
2667 case SpvOpImageSampleImplicitLod:
2668 case SpvOpImageSparseSampleImplicitLod:
2669 case SpvOpImageSampleDrefImplicitLod:
2670 case SpvOpImageSparseSampleDrefImplicitLod:
2671 case SpvOpImageSampleProjImplicitLod:
2672 case SpvOpImageSampleProjDrefImplicitLod:
2673 texop = nir_texop_tex;
2674 break;
2675
2676 case SpvOpImageSampleExplicitLod:
2677 case SpvOpImageSparseSampleExplicitLod:
2678 case SpvOpImageSampleDrefExplicitLod:
2679 case SpvOpImageSparseSampleDrefExplicitLod:
2680 case SpvOpImageSampleProjExplicitLod:
2681 case SpvOpImageSampleProjDrefExplicitLod:
2682 texop = nir_texop_txl;
2683 break;
2684
2685 case SpvOpImageFetch:
2686 case SpvOpImageSparseFetch:
2687 if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
2688 texop = nir_texop_txf_ms;
2689 } else {
2690 texop = nir_texop_txf;
2691 }
2692 break;
2693
2694 case SpvOpImageGather:
2695 case SpvOpImageSparseGather:
2696 case SpvOpImageDrefGather:
2697 case SpvOpImageSparseDrefGather:
2698 texop = nir_texop_tg4;
2699 break;
2700
2701 case SpvOpImageQuerySizeLod:
2702 case SpvOpImageQuerySize:
2703 texop = nir_texop_txs;
2704 dest_type = nir_type_int32;
2705 break;
2706
2707 case SpvOpImageQueryLod:
2708 texop = nir_texop_lod;
2709 dest_type = nir_type_float32;
2710 break;
2711
2712 case SpvOpImageQueryLevels:
2713 texop = nir_texop_query_levels;
2714 dest_type = nir_type_int32;
2715 break;
2716
2717 case SpvOpImageQuerySamples:
2718 texop = nir_texop_texture_samples;
2719 dest_type = nir_type_int32;
2720 break;
2721
2722 case SpvOpFragmentFetchAMD:
2723 texop = nir_texop_fragment_fetch_amd;
2724 break;
2725
2726 case SpvOpFragmentMaskFetchAMD:
2727 texop = nir_texop_fragment_mask_fetch_amd;
2728 dest_type = nir_type_uint32;
2729 break;
2730
2731 default:
2732 vtn_fail_with_opcode("Unhandled opcode", opcode);
2733 }
2734
2735 nir_tex_src srcs[10]; /* 10 should be enough */
2736 nir_tex_src *p = srcs;
2737
2738 p->src = nir_src_for_ssa(&image->dest.ssa);
2739 p->src_type = nir_tex_src_texture_deref;
2740 p++;
2741
2742 switch (texop) {
2743 case nir_texop_tex:
2744 case nir_texop_txb:
2745 case nir_texop_txl:
2746 case nir_texop_txd:
2747 case nir_texop_tg4:
2748 case nir_texop_lod:
2749 vtn_fail_if(sampler == NULL,
2750 "%s requires an image of type OpTypeSampledImage",
2751 spirv_op_to_string(opcode));
2752 p->src = nir_src_for_ssa(&sampler->dest.ssa);
2753 p->src_type = nir_tex_src_sampler_deref;
2754 p++;
2755 break;
2756 case nir_texop_txf:
2757 case nir_texop_txf_ms:
2758 case nir_texop_txs:
2759 case nir_texop_query_levels:
2760 case nir_texop_texture_samples:
2761 case nir_texop_samples_identical:
2762 case nir_texop_fragment_fetch_amd:
2763 case nir_texop_fragment_mask_fetch_amd:
2764 /* These don't */
2765 break;
2766 case nir_texop_txf_ms_fb:
2767 vtn_fail("unexpected nir_texop_txf_ms_fb");
2768 break;
2769 case nir_texop_txf_ms_mcs_intel:
2770 vtn_fail("unexpected nir_texop_txf_ms_mcs");
2771 case nir_texop_tex_prefetch:
2772 vtn_fail("unexpected nir_texop_tex_prefetch");
2773 }
2774
2775 unsigned idx = 4;
2776
2777 struct nir_ssa_def *coord;
2778 unsigned coord_components;
2779 switch (opcode) {
2780 case SpvOpImageSampleImplicitLod:
2781 case SpvOpImageSparseSampleImplicitLod:
2782 case SpvOpImageSampleExplicitLod:
2783 case SpvOpImageSparseSampleExplicitLod:
2784 case SpvOpImageSampleDrefImplicitLod:
2785 case SpvOpImageSparseSampleDrefImplicitLod:
2786 case SpvOpImageSampleDrefExplicitLod:
2787 case SpvOpImageSparseSampleDrefExplicitLod:
2788 case SpvOpImageSampleProjImplicitLod:
2789 case SpvOpImageSampleProjExplicitLod:
2790 case SpvOpImageSampleProjDrefImplicitLod:
2791 case SpvOpImageSampleProjDrefExplicitLod:
2792 case SpvOpImageFetch:
2793 case SpvOpImageSparseFetch:
2794 case SpvOpImageGather:
2795 case SpvOpImageSparseGather:
2796 case SpvOpImageDrefGather:
2797 case SpvOpImageSparseDrefGather:
2798 case SpvOpImageQueryLod:
2799 case SpvOpFragmentFetchAMD:
2800 case SpvOpFragmentMaskFetchAMD: {
2801 /* All these types have the coordinate as their first real argument */
2802 coord_components = glsl_get_sampler_dim_coordinate_components(sampler_dim);
2803
2804 if (is_array && texop != nir_texop_lod)
2805 coord_components++;
2806
2807 struct vtn_ssa_value *coord_val = vtn_ssa_value(b, w[idx++]);
2808 coord = coord_val->def;
2809 /* From the SPIR-V spec verxion 1.5, rev. 5:
2810 *
2811 * "Coordinate must be a scalar or vector of floating-point type. It
2812 * contains (u[, v] ... [, array layer]) as needed by the definition
2813 * of Sampled Image. It may be a vector larger than needed, but all
2814 * unused components appear after all used components."
2815 */
2816 vtn_fail_if(coord->num_components < coord_components,
2817 "Coordinate value passed has fewer components than sampler dimensionality.");
2818 p->src = nir_src_for_ssa(nir_channels(&b->nb, coord,
2819 (1 << coord_components) - 1));
2820
2821 /* OpenCL allows integer sampling coordinates */
2822 if (glsl_type_is_integer(coord_val->type) &&
2823 opcode == SpvOpImageSampleExplicitLod) {
2824 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
2825 "Unless the Kernel capability is being used, the coordinate parameter "
2826 "OpImageSampleExplicitLod must be floating point.");
2827
2828 nir_ssa_def *coords[4];
2829 nir_ssa_def *f0_5 = nir_imm_float(&b->nb, 0.5);
2830 for (unsigned i = 0; i < coord_components; i++) {
2831 coords[i] = nir_i2f32(&b->nb, nir_channel(&b->nb, p->src.ssa, i));
2832
2833 if (!is_array || i != coord_components - 1)
2834 coords[i] = nir_fadd(&b->nb, coords[i], f0_5);
2835 }
2836
2837 p->src = nir_src_for_ssa(nir_vec(&b->nb, coords, coord_components));
2838 }
2839
2840 p->src_type = nir_tex_src_coord;
2841 p++;
2842 break;
2843 }
2844
2845 default:
2846 coord = NULL;
2847 coord_components = 0;
2848 break;
2849 }
2850
2851 switch (opcode) {
2852 case SpvOpImageSampleProjImplicitLod:
2853 case SpvOpImageSampleProjExplicitLod:
2854 case SpvOpImageSampleProjDrefImplicitLod:
2855 case SpvOpImageSampleProjDrefExplicitLod:
2856 /* These have the projector as the last coordinate component */
2857 p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components));
2858 p->src_type = nir_tex_src_projector;
2859 p++;
2860 break;
2861
2862 default:
2863 break;
2864 }
2865
2866 bool is_shadow = false;
2867 unsigned gather_component = 0;
2868 switch (opcode) {
2869 case SpvOpImageSampleDrefImplicitLod:
2870 case SpvOpImageSparseSampleDrefImplicitLod:
2871 case SpvOpImageSampleDrefExplicitLod:
2872 case SpvOpImageSparseSampleDrefExplicitLod:
2873 case SpvOpImageSampleProjDrefImplicitLod:
2874 case SpvOpImageSampleProjDrefExplicitLod:
2875 case SpvOpImageDrefGather:
2876 case SpvOpImageSparseDrefGather:
2877 /* These all have an explicit depth value as their next source */
2878 is_shadow = true;
2879 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
2880 break;
2881
2882 case SpvOpImageGather:
2883 case SpvOpImageSparseGather:
2884 /* This has a component as its next source */
2885 gather_component = vtn_constant_uint(b, w[idx++]);
2886 break;
2887
2888 default:
2889 break;
2890 }
2891
2892 bool is_sparse = false;
2893 switch (opcode) {
2894 case SpvOpImageSparseSampleImplicitLod:
2895 case SpvOpImageSparseSampleExplicitLod:
2896 case SpvOpImageSparseSampleDrefImplicitLod:
2897 case SpvOpImageSparseSampleDrefExplicitLod:
2898 case SpvOpImageSparseFetch:
2899 case SpvOpImageSparseGather:
2900 case SpvOpImageSparseDrefGather:
2901 is_sparse = true;
2902 break;
2903 default:
2904 break;
2905 }
2906
2907 /* For OpImageQuerySizeLod, we always have an LOD */
2908 if (opcode == SpvOpImageQuerySizeLod)
2909 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
2910
2911 /* For OpFragmentFetchAMD, we always have a multisample index */
2912 if (opcode == SpvOpFragmentFetchAMD)
2913 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
2914
2915 /* Now we need to handle some number of optional arguments */
2916 struct vtn_value *gather_offsets = NULL;
2917 uint32_t operands = SpvImageOperandsMaskNone;
2918 if (idx < count) {
2919 operands = w[idx];
2920
2921 if (operands & SpvImageOperandsBiasMask) {
2922 vtn_assert(texop == nir_texop_tex ||
2923 texop == nir_texop_tg4);
2924 if (texop == nir_texop_tex)
2925 texop = nir_texop_txb;
2926 uint32_t arg = image_operand_arg(b, w, count, idx,
2927 SpvImageOperandsBiasMask);
2928 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_bias);
2929 }
2930
2931 if (operands & SpvImageOperandsLodMask) {
2932 vtn_assert(texop == nir_texop_txl || texop == nir_texop_txf ||
2933 texop == nir_texop_txs || texop == nir_texop_tg4);
2934 uint32_t arg = image_operand_arg(b, w, count, idx,
2935 SpvImageOperandsLodMask);
2936 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_lod);
2937 }
2938
2939 if (operands & SpvImageOperandsGradMask) {
2940 vtn_assert(texop == nir_texop_txl);
2941 texop = nir_texop_txd;
2942 uint32_t arg = image_operand_arg(b, w, count, idx,
2943 SpvImageOperandsGradMask);
2944 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ddx);
2945 (*p++) = vtn_tex_src(b, w[arg + 1], nir_tex_src_ddy);
2946 }
2947
2948 vtn_fail_if(util_bitcount(operands & (SpvImageOperandsConstOffsetsMask |
2949 SpvImageOperandsOffsetMask |
2950 SpvImageOperandsConstOffsetMask)) > 1,
2951 "At most one of the ConstOffset, Offset, and ConstOffsets "
2952 "image operands can be used on a given instruction.");
2953
2954 if (operands & SpvImageOperandsOffsetMask) {
2955 uint32_t arg = image_operand_arg(b, w, count, idx,
2956 SpvImageOperandsOffsetMask);
2957 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
2958 }
2959
2960 if (operands & SpvImageOperandsConstOffsetMask) {
2961 uint32_t arg = image_operand_arg(b, w, count, idx,
2962 SpvImageOperandsConstOffsetMask);
2963 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
2964 }
2965
2966 if (operands & SpvImageOperandsConstOffsetsMask) {
2967 vtn_assert(texop == nir_texop_tg4);
2968 uint32_t arg = image_operand_arg(b, w, count, idx,
2969 SpvImageOperandsConstOffsetsMask);
2970 gather_offsets = vtn_value(b, w[arg], vtn_value_type_constant);
2971 }
2972
2973 if (operands & SpvImageOperandsSampleMask) {
2974 vtn_assert(texop == nir_texop_txf_ms);
2975 uint32_t arg = image_operand_arg(b, w, count, idx,
2976 SpvImageOperandsSampleMask);
2977 texop = nir_texop_txf_ms;
2978 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ms_index);
2979 }
2980
2981 if (operands & SpvImageOperandsMinLodMask) {
2982 vtn_assert(texop == nir_texop_tex ||
2983 texop == nir_texop_txb ||
2984 texop == nir_texop_txd);
2985 uint32_t arg = image_operand_arg(b, w, count, idx,
2986 SpvImageOperandsMinLodMask);
2987 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_min_lod);
2988 }
2989 }
2990
2991 struct vtn_type *ret_type = vtn_get_type(b, w[1]);
2992 struct vtn_type *struct_type = NULL;
2993 if (is_sparse) {
2994 vtn_assert(glsl_type_is_struct_or_ifc(ret_type->type));
2995 struct_type = ret_type;
2996 ret_type = struct_type->members[1];
2997 }
2998
2999 nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
3000 instr->op = texop;
3001
3002 memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
3003
3004 instr->coord_components = coord_components;
3005 instr->sampler_dim = sampler_dim;
3006 instr->is_array = is_array;
3007 instr->is_shadow = is_shadow;
3008 instr->is_sparse = is_sparse;
3009 instr->is_new_style_shadow =
3010 is_shadow && glsl_get_components(ret_type->type) == 1;
3011 instr->component = gather_component;
3012
3013 /* The Vulkan spec says:
3014 *
3015 * "If an instruction loads from or stores to a resource (including
3016 * atomics and image instructions) and the resource descriptor being
3017 * accessed is not dynamically uniform, then the operand corresponding
3018 * to that resource (e.g. the pointer or sampled image operand) must be
3019 * decorated with NonUniform."
3020 *
3021 * It's very careful to specify that the exact operand must be decorated
3022 * NonUniform. The SPIR-V parser is not expected to chase through long
3023 * chains to find the NonUniform decoration. It's either right there or we
3024 * can assume it doesn't exist.
3025 */
3026 enum gl_access_qualifier access = 0;
3027 vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
3028
3029 if (sampled_val->propagated_non_uniform)
3030 access |= ACCESS_NON_UNIFORM;
3031
3032 if (image && (access & ACCESS_NON_UNIFORM))
3033 instr->texture_non_uniform = true;
3034
3035 if (sampler && (access & ACCESS_NON_UNIFORM))
3036 instr->sampler_non_uniform = true;
3037
3038 /* for non-query ops, get dest_type from SPIR-V return type */
3039 if (dest_type == nir_type_invalid) {
3040 /* the return type should match the image type, unless the image type is
3041 * VOID (CL image), in which case the return type dictates the sampler
3042 */
3043 enum glsl_base_type sampler_base =
3044 glsl_get_sampler_result_type(image->type);
3045 enum glsl_base_type ret_base = glsl_get_base_type(ret_type->type);
3046 vtn_fail_if(sampler_base != ret_base && sampler_base != GLSL_TYPE_VOID,
3047 "SPIR-V return type mismatches image type. This is only valid "
3048 "for untyped images (OpenCL).");
3049 dest_type = nir_get_nir_type_for_glsl_base_type(ret_base);
3050 dest_type = get_image_type(b, dest_type, operands);
3051 }
3052
3053 instr->dest_type = dest_type;
3054
3055 nir_ssa_dest_init(&instr->instr, &instr->dest,
3056 nir_tex_instr_dest_size(instr), 32, NULL);
3057
3058 vtn_assert(glsl_get_vector_elements(ret_type->type) ==
3059 nir_tex_instr_result_size(instr));
3060
3061 if (gather_offsets) {
3062 vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array ||
3063 gather_offsets->type->length != 4,
3064 "ConstOffsets must be an array of size four of vectors "
3065 "of two integer components");
3066
3067 struct vtn_type *vec_type = gather_offsets->type->array_element;
3068 vtn_fail_if(vec_type->base_type != vtn_base_type_vector ||
3069 vec_type->length != 2 ||
3070 !glsl_type_is_integer(vec_type->type),
3071 "ConstOffsets must be an array of size four of vectors "
3072 "of two integer components");
3073
3074 unsigned bit_size = glsl_get_bit_size(vec_type->type);
3075 for (uint32_t i = 0; i < 4; i++) {
3076 const nir_const_value *cvec =
3077 gather_offsets->constant->elements[i]->values;
3078 for (uint32_t j = 0; j < 2; j++) {
3079 switch (bit_size) {
3080 case 8: instr->tg4_offsets[i][j] = cvec[j].i8; break;
3081 case 16: instr->tg4_offsets[i][j] = cvec[j].i16; break;
3082 case 32: instr->tg4_offsets[i][j] = cvec[j].i32; break;
3083 case 64: instr->tg4_offsets[i][j] = cvec[j].i64; break;
3084 default:
3085 vtn_fail("Unsupported bit size: %u", bit_size);
3086 }
3087 }
3088 }
3089 }
3090
3091 nir_builder_instr_insert(&b->nb, &instr->instr);
3092
3093 if (is_sparse) {
3094 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3095 unsigned result_size = glsl_get_vector_elements(ret_type->type);
3096 dest->elems[0]->def = nir_channel(&b->nb, &instr->dest.ssa, result_size);
3097 dest->elems[1]->def = nir_channels(&b->nb, &instr->dest.ssa,
3098 BITFIELD_MASK(result_size));
3099 vtn_push_ssa_value(b, w[2], dest);
3100 } else {
3101 vtn_push_nir_ssa(b, w[2], &instr->dest.ssa);
3102 }
3103 }
3104
3105 static void
fill_common_atomic_sources(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_src * src)3106 fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
3107 const uint32_t *w, nir_src *src)
3108 {
3109 const struct glsl_type *type = vtn_get_type(b, w[1])->type;
3110 unsigned bit_size = glsl_get_bit_size(type);
3111
3112 switch (opcode) {
3113 case SpvOpAtomicIIncrement:
3114 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 1, bit_size));
3115 break;
3116
3117 case SpvOpAtomicIDecrement:
3118 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, bit_size));
3119 break;
3120
3121 case SpvOpAtomicISub:
3122 src[0] =
3123 nir_src_for_ssa(nir_ineg(&b->nb, vtn_get_nir_ssa(b, w[6])));
3124 break;
3125
3126 case SpvOpAtomicCompareExchange:
3127 case SpvOpAtomicCompareExchangeWeak:
3128 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[8]));
3129 src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[7]));
3130 break;
3131
3132 case SpvOpAtomicExchange:
3133 case SpvOpAtomicIAdd:
3134 case SpvOpAtomicSMin:
3135 case SpvOpAtomicUMin:
3136 case SpvOpAtomicSMax:
3137 case SpvOpAtomicUMax:
3138 case SpvOpAtomicAnd:
3139 case SpvOpAtomicOr:
3140 case SpvOpAtomicXor:
3141 case SpvOpAtomicFAddEXT:
3142 case SpvOpAtomicFMinEXT:
3143 case SpvOpAtomicFMaxEXT:
3144 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6]));
3145 break;
3146
3147 default:
3148 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3149 }
3150 }
3151
3152 static nir_ssa_def *
get_image_coord(struct vtn_builder * b,uint32_t value)3153 get_image_coord(struct vtn_builder *b, uint32_t value)
3154 {
3155 nir_ssa_def *coord = vtn_get_nir_ssa(b, value);
3156 /* The image_load_store intrinsics assume a 4-dim coordinate */
3157 return nir_pad_vec4(&b->nb, coord);
3158 }
3159
3160 static void
vtn_handle_image(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3161 vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
3162 const uint32_t *w, unsigned count)
3163 {
3164 /* Just get this one out of the way */
3165 if (opcode == SpvOpImageTexelPointer) {
3166 struct vtn_value *val =
3167 vtn_push_value(b, w[2], vtn_value_type_image_pointer);
3168 val->image = ralloc(b, struct vtn_image_pointer);
3169
3170 val->image->image = vtn_nir_deref(b, w[3]);
3171 val->image->coord = get_image_coord(b, w[4]);
3172 val->image->sample = vtn_get_nir_ssa(b, w[5]);
3173 val->image->lod = nir_imm_int(&b->nb, 0);
3174 return;
3175 }
3176
3177 struct vtn_image_pointer image;
3178 SpvScope scope = SpvScopeInvocation;
3179 SpvMemorySemanticsMask semantics = 0;
3180 SpvImageOperandsMask operands = SpvImageOperandsMaskNone;
3181
3182 enum gl_access_qualifier access = 0;
3183
3184 struct vtn_value *res_val;
3185 switch (opcode) {
3186 case SpvOpAtomicExchange:
3187 case SpvOpAtomicCompareExchange:
3188 case SpvOpAtomicCompareExchangeWeak:
3189 case SpvOpAtomicIIncrement:
3190 case SpvOpAtomicIDecrement:
3191 case SpvOpAtomicIAdd:
3192 case SpvOpAtomicISub:
3193 case SpvOpAtomicLoad:
3194 case SpvOpAtomicSMin:
3195 case SpvOpAtomicUMin:
3196 case SpvOpAtomicSMax:
3197 case SpvOpAtomicUMax:
3198 case SpvOpAtomicAnd:
3199 case SpvOpAtomicOr:
3200 case SpvOpAtomicXor:
3201 case SpvOpAtomicFAddEXT:
3202 case SpvOpAtomicFMinEXT:
3203 case SpvOpAtomicFMaxEXT:
3204 res_val = vtn_value(b, w[3], vtn_value_type_image_pointer);
3205 image = *res_val->image;
3206 scope = vtn_constant_uint(b, w[4]);
3207 semantics = vtn_constant_uint(b, w[5]);
3208 access |= ACCESS_COHERENT;
3209 break;
3210
3211 case SpvOpAtomicStore:
3212 res_val = vtn_value(b, w[1], vtn_value_type_image_pointer);
3213 image = *res_val->image;
3214 scope = vtn_constant_uint(b, w[2]);
3215 semantics = vtn_constant_uint(b, w[3]);
3216 access |= ACCESS_COHERENT;
3217 break;
3218
3219 case SpvOpImageQuerySizeLod:
3220 res_val = vtn_untyped_value(b, w[3]);
3221 image.image = vtn_get_image(b, w[3], &access);
3222 image.coord = NULL;
3223 image.sample = NULL;
3224 image.lod = vtn_ssa_value(b, w[4])->def;
3225 break;
3226
3227 case SpvOpImageQuerySize:
3228 case SpvOpImageQuerySamples:
3229 res_val = vtn_untyped_value(b, w[3]);
3230 image.image = vtn_get_image(b, w[3], &access);
3231 image.coord = NULL;
3232 image.sample = NULL;
3233 image.lod = NULL;
3234 break;
3235
3236 case SpvOpImageQueryFormat:
3237 case SpvOpImageQueryOrder:
3238 res_val = vtn_untyped_value(b, w[3]);
3239 image.image = vtn_get_image(b, w[3], &access);
3240 image.coord = NULL;
3241 image.sample = NULL;
3242 image.lod = NULL;
3243 break;
3244
3245 case SpvOpImageRead:
3246 case SpvOpImageSparseRead: {
3247 res_val = vtn_untyped_value(b, w[3]);
3248 image.image = vtn_get_image(b, w[3], &access);
3249 image.coord = get_image_coord(b, w[4]);
3250
3251 operands = count > 5 ? w[5] : SpvImageOperandsMaskNone;
3252
3253 if (operands & SpvImageOperandsSampleMask) {
3254 uint32_t arg = image_operand_arg(b, w, count, 5,
3255 SpvImageOperandsSampleMask);
3256 image.sample = vtn_get_nir_ssa(b, w[arg]);
3257 } else {
3258 image.sample = nir_ssa_undef(&b->nb, 1, 32);
3259 }
3260
3261 if (operands & SpvImageOperandsMakeTexelVisibleMask) {
3262 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3263 "MakeTexelVisible requires NonPrivateTexel to also be set.");
3264 uint32_t arg = image_operand_arg(b, w, count, 5,
3265 SpvImageOperandsMakeTexelVisibleMask);
3266 semantics = SpvMemorySemanticsMakeVisibleMask;
3267 scope = vtn_constant_uint(b, w[arg]);
3268 }
3269
3270 if (operands & SpvImageOperandsLodMask) {
3271 uint32_t arg = image_operand_arg(b, w, count, 5,
3272 SpvImageOperandsLodMask);
3273 image.lod = vtn_get_nir_ssa(b, w[arg]);
3274 } else {
3275 image.lod = nir_imm_int(&b->nb, 0);
3276 }
3277
3278 if (operands & SpvImageOperandsVolatileTexelMask)
3279 access |= ACCESS_VOLATILE;
3280
3281 break;
3282 }
3283
3284 case SpvOpImageWrite: {
3285 res_val = vtn_untyped_value(b, w[1]);
3286 image.image = vtn_get_image(b, w[1], &access);
3287 image.coord = get_image_coord(b, w[2]);
3288
3289 /* texel = w[3] */
3290
3291 operands = count > 4 ? w[4] : SpvImageOperandsMaskNone;
3292
3293 if (operands & SpvImageOperandsSampleMask) {
3294 uint32_t arg = image_operand_arg(b, w, count, 4,
3295 SpvImageOperandsSampleMask);
3296 image.sample = vtn_get_nir_ssa(b, w[arg]);
3297 } else {
3298 image.sample = nir_ssa_undef(&b->nb, 1, 32);
3299 }
3300
3301 if (operands & SpvImageOperandsMakeTexelAvailableMask) {
3302 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3303 "MakeTexelAvailable requires NonPrivateTexel to also be set.");
3304 uint32_t arg = image_operand_arg(b, w, count, 4,
3305 SpvImageOperandsMakeTexelAvailableMask);
3306 semantics = SpvMemorySemanticsMakeAvailableMask;
3307 scope = vtn_constant_uint(b, w[arg]);
3308 }
3309
3310 if (operands & SpvImageOperandsLodMask) {
3311 uint32_t arg = image_operand_arg(b, w, count, 4,
3312 SpvImageOperandsLodMask);
3313 image.lod = vtn_get_nir_ssa(b, w[arg]);
3314 } else {
3315 image.lod = nir_imm_int(&b->nb, 0);
3316 }
3317
3318 if (operands & SpvImageOperandsVolatileTexelMask)
3319 access |= ACCESS_VOLATILE;
3320
3321 break;
3322 }
3323
3324 default:
3325 vtn_fail_with_opcode("Invalid image opcode", opcode);
3326 }
3327
3328 if (semantics & SpvMemorySemanticsVolatileMask)
3329 access |= ACCESS_VOLATILE;
3330
3331 nir_intrinsic_op op;
3332 switch (opcode) {
3333 #define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_deref_##N; break;
3334 OP(ImageQuerySize, size)
3335 OP(ImageQuerySizeLod, size)
3336 OP(ImageRead, load)
3337 OP(ImageSparseRead, sparse_load)
3338 OP(ImageWrite, store)
3339 OP(AtomicLoad, load)
3340 OP(AtomicStore, store)
3341 OP(AtomicExchange, atomic_exchange)
3342 OP(AtomicCompareExchange, atomic_comp_swap)
3343 OP(AtomicCompareExchangeWeak, atomic_comp_swap)
3344 OP(AtomicIIncrement, atomic_add)
3345 OP(AtomicIDecrement, atomic_add)
3346 OP(AtomicIAdd, atomic_add)
3347 OP(AtomicISub, atomic_add)
3348 OP(AtomicSMin, atomic_imin)
3349 OP(AtomicUMin, atomic_umin)
3350 OP(AtomicSMax, atomic_imax)
3351 OP(AtomicUMax, atomic_umax)
3352 OP(AtomicAnd, atomic_and)
3353 OP(AtomicOr, atomic_or)
3354 OP(AtomicXor, atomic_xor)
3355 OP(AtomicFAddEXT, atomic_fadd)
3356 OP(AtomicFMinEXT, atomic_fmin)
3357 OP(AtomicFMaxEXT, atomic_fmax)
3358 OP(ImageQueryFormat, format)
3359 OP(ImageQueryOrder, order)
3360 OP(ImageQuerySamples, samples)
3361 #undef OP
3362 default:
3363 vtn_fail_with_opcode("Invalid image opcode", opcode);
3364 }
3365
3366 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
3367
3368 intrin->src[0] = nir_src_for_ssa(&image.image->dest.ssa);
3369 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(image.image->type));
3370 nir_intrinsic_set_image_array(intrin,
3371 glsl_sampler_type_is_array(image.image->type));
3372
3373 switch (opcode) {
3374 case SpvOpImageQuerySamples:
3375 case SpvOpImageQuerySize:
3376 case SpvOpImageQuerySizeLod:
3377 case SpvOpImageQueryFormat:
3378 case SpvOpImageQueryOrder:
3379 break;
3380 default:
3381 /* The image coordinate is always 4 components but we may not have that
3382 * many. Swizzle to compensate.
3383 */
3384 intrin->src[1] = nir_src_for_ssa(nir_pad_vec4(&b->nb, image.coord));
3385 intrin->src[2] = nir_src_for_ssa(image.sample);
3386 break;
3387 }
3388
3389 /* The Vulkan spec says:
3390 *
3391 * "If an instruction loads from or stores to a resource (including
3392 * atomics and image instructions) and the resource descriptor being
3393 * accessed is not dynamically uniform, then the operand corresponding
3394 * to that resource (e.g. the pointer or sampled image operand) must be
3395 * decorated with NonUniform."
3396 *
3397 * It's very careful to specify that the exact operand must be decorated
3398 * NonUniform. The SPIR-V parser is not expected to chase through long
3399 * chains to find the NonUniform decoration. It's either right there or we
3400 * can assume it doesn't exist.
3401 */
3402 vtn_foreach_decoration(b, res_val, non_uniform_decoration_cb, &access);
3403 nir_intrinsic_set_access(intrin, access);
3404
3405 switch (opcode) {
3406 case SpvOpImageQuerySamples:
3407 case SpvOpImageQueryFormat:
3408 case SpvOpImageQueryOrder:
3409 /* No additional sources */
3410 break;
3411 case SpvOpImageQuerySize:
3412 intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0));
3413 break;
3414 case SpvOpImageQuerySizeLod:
3415 intrin->src[1] = nir_src_for_ssa(image.lod);
3416 break;
3417 case SpvOpAtomicLoad:
3418 case SpvOpImageRead:
3419 case SpvOpImageSparseRead:
3420 /* Only OpImageRead can support a lod parameter if
3421 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3422 * intrinsics definition for atomics requires us to set it for
3423 * OpAtomicLoad.
3424 */
3425 intrin->src[3] = nir_src_for_ssa(image.lod);
3426 break;
3427 case SpvOpAtomicStore:
3428 case SpvOpImageWrite: {
3429 const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3];
3430 struct vtn_ssa_value *value = vtn_ssa_value(b, value_id);
3431 /* nir_intrinsic_image_deref_store always takes a vec4 value */
3432 assert(op == nir_intrinsic_image_deref_store);
3433 intrin->num_components = 4;
3434 intrin->src[3] = nir_src_for_ssa(nir_pad_vec4(&b->nb, value->def));
3435 /* Only OpImageWrite can support a lod parameter if
3436 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3437 * intrinsics definition for atomics requires us to set it for
3438 * OpAtomicStore.
3439 */
3440 intrin->src[4] = nir_src_for_ssa(image.lod);
3441
3442 if (opcode == SpvOpImageWrite) {
3443 nir_alu_type src_type =
3444 get_image_type(b, nir_get_nir_type_for_glsl_type(value->type), operands);
3445 nir_intrinsic_set_src_type(intrin, src_type);
3446 }
3447 break;
3448 }
3449
3450 case SpvOpAtomicCompareExchange:
3451 case SpvOpAtomicCompareExchangeWeak:
3452 case SpvOpAtomicIIncrement:
3453 case SpvOpAtomicIDecrement:
3454 case SpvOpAtomicExchange:
3455 case SpvOpAtomicIAdd:
3456 case SpvOpAtomicISub:
3457 case SpvOpAtomicSMin:
3458 case SpvOpAtomicUMin:
3459 case SpvOpAtomicSMax:
3460 case SpvOpAtomicUMax:
3461 case SpvOpAtomicAnd:
3462 case SpvOpAtomicOr:
3463 case SpvOpAtomicXor:
3464 case SpvOpAtomicFAddEXT:
3465 case SpvOpAtomicFMinEXT:
3466 case SpvOpAtomicFMaxEXT:
3467 fill_common_atomic_sources(b, opcode, w, &intrin->src[3]);
3468 break;
3469
3470 default:
3471 vtn_fail_with_opcode("Invalid image opcode", opcode);
3472 }
3473
3474 /* Image operations implicitly have the Image storage memory semantics. */
3475 semantics |= SpvMemorySemanticsImageMemoryMask;
3476
3477 SpvMemorySemanticsMask before_semantics;
3478 SpvMemorySemanticsMask after_semantics;
3479 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
3480
3481 if (before_semantics)
3482 vtn_emit_memory_barrier(b, scope, before_semantics);
3483
3484 if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) {
3485 struct vtn_type *type = vtn_get_type(b, w[1]);
3486 struct vtn_type *struct_type = NULL;
3487 if (opcode == SpvOpImageSparseRead) {
3488 vtn_assert(glsl_type_is_struct_or_ifc(type->type));
3489 struct_type = type;
3490 type = struct_type->members[1];
3491 }
3492
3493 unsigned dest_components = glsl_get_vector_elements(type->type);
3494 if (opcode == SpvOpImageSparseRead)
3495 dest_components++;
3496
3497 if (nir_intrinsic_infos[op].dest_components == 0)
3498 intrin->num_components = dest_components;
3499
3500 nir_ssa_dest_init(&intrin->instr, &intrin->dest,
3501 nir_intrinsic_dest_components(intrin),
3502 glsl_get_bit_size(type->type), NULL);
3503
3504 nir_builder_instr_insert(&b->nb, &intrin->instr);
3505
3506 nir_ssa_def *result = &intrin->dest.ssa;
3507 if (nir_intrinsic_dest_components(intrin) != dest_components)
3508 result = nir_channels(&b->nb, result, (1 << dest_components) - 1);
3509
3510 if (opcode == SpvOpImageSparseRead) {
3511 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3512 unsigned res_type_size = glsl_get_vector_elements(type->type);
3513 dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size);
3514 if (intrin->dest.ssa.bit_size != 32)
3515 dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def);
3516 dest->elems[1]->def = nir_channels(&b->nb, result,
3517 BITFIELD_MASK(res_type_size));
3518 vtn_push_ssa_value(b, w[2], dest);
3519 } else {
3520 vtn_push_nir_ssa(b, w[2], result);
3521 }
3522
3523 if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead) {
3524 nir_alu_type dest_type =
3525 get_image_type(b, nir_get_nir_type_for_glsl_type(type->type), operands);
3526 nir_intrinsic_set_dest_type(intrin, dest_type);
3527 }
3528 } else {
3529 nir_builder_instr_insert(&b->nb, &intrin->instr);
3530 }
3531
3532 if (after_semantics)
3533 vtn_emit_memory_barrier(b, scope, after_semantics);
3534 }
3535
3536 static nir_intrinsic_op
get_uniform_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)3537 get_uniform_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
3538 {
3539 switch (opcode) {
3540 #define OP(S, N) case SpvOp##S: return nir_intrinsic_atomic_counter_ ##N;
3541 OP(AtomicLoad, read_deref)
3542 OP(AtomicExchange, exchange)
3543 OP(AtomicCompareExchange, comp_swap)
3544 OP(AtomicCompareExchangeWeak, comp_swap)
3545 OP(AtomicIIncrement, inc_deref)
3546 OP(AtomicIDecrement, post_dec_deref)
3547 OP(AtomicIAdd, add_deref)
3548 OP(AtomicISub, add_deref)
3549 OP(AtomicUMin, min_deref)
3550 OP(AtomicUMax, max_deref)
3551 OP(AtomicAnd, and_deref)
3552 OP(AtomicOr, or_deref)
3553 OP(AtomicXor, xor_deref)
3554 #undef OP
3555 default:
3556 /* We left the following out: AtomicStore, AtomicSMin and
3557 * AtomicSmax. Right now there are not nir intrinsics for them. At this
3558 * moment Atomic Counter support is needed for ARB_spirv support, so is
3559 * only need to support GLSL Atomic Counters that are uints and don't
3560 * allow direct storage.
3561 */
3562 vtn_fail("Invalid uniform atomic");
3563 }
3564 }
3565
3566 static nir_intrinsic_op
get_deref_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)3567 get_deref_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
3568 {
3569 switch (opcode) {
3570 case SpvOpAtomicLoad: return nir_intrinsic_load_deref;
3571 case SpvOpAtomicFlagClear:
3572 case SpvOpAtomicStore: return nir_intrinsic_store_deref;
3573 #define OP(S, N) case SpvOp##S: return nir_intrinsic_deref_##N;
3574 OP(AtomicExchange, atomic_exchange)
3575 OP(AtomicCompareExchange, atomic_comp_swap)
3576 OP(AtomicCompareExchangeWeak, atomic_comp_swap)
3577 OP(AtomicIIncrement, atomic_add)
3578 OP(AtomicIDecrement, atomic_add)
3579 OP(AtomicIAdd, atomic_add)
3580 OP(AtomicISub, atomic_add)
3581 OP(AtomicSMin, atomic_imin)
3582 OP(AtomicUMin, atomic_umin)
3583 OP(AtomicSMax, atomic_imax)
3584 OP(AtomicUMax, atomic_umax)
3585 OP(AtomicAnd, atomic_and)
3586 OP(AtomicOr, atomic_or)
3587 OP(AtomicXor, atomic_xor)
3588 OP(AtomicFAddEXT, atomic_fadd)
3589 OP(AtomicFMinEXT, atomic_fmin)
3590 OP(AtomicFMaxEXT, atomic_fmax)
3591 OP(AtomicFlagTestAndSet, atomic_comp_swap)
3592 #undef OP
3593 default:
3594 vtn_fail_with_opcode("Invalid shared atomic", opcode);
3595 }
3596 }
3597
3598 /*
3599 * Handles shared atomics, ssbo atomics and atomic counters.
3600 */
3601 static void
vtn_handle_atomics(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)3602 vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
3603 const uint32_t *w, UNUSED unsigned count)
3604 {
3605 struct vtn_pointer *ptr;
3606 nir_intrinsic_instr *atomic;
3607
3608 SpvScope scope = SpvScopeInvocation;
3609 SpvMemorySemanticsMask semantics = 0;
3610 enum gl_access_qualifier access = 0;
3611
3612 switch (opcode) {
3613 case SpvOpAtomicLoad:
3614 case SpvOpAtomicExchange:
3615 case SpvOpAtomicCompareExchange:
3616 case SpvOpAtomicCompareExchangeWeak:
3617 case SpvOpAtomicIIncrement:
3618 case SpvOpAtomicIDecrement:
3619 case SpvOpAtomicIAdd:
3620 case SpvOpAtomicISub:
3621 case SpvOpAtomicSMin:
3622 case SpvOpAtomicUMin:
3623 case SpvOpAtomicSMax:
3624 case SpvOpAtomicUMax:
3625 case SpvOpAtomicAnd:
3626 case SpvOpAtomicOr:
3627 case SpvOpAtomicXor:
3628 case SpvOpAtomicFAddEXT:
3629 case SpvOpAtomicFMinEXT:
3630 case SpvOpAtomicFMaxEXT:
3631 case SpvOpAtomicFlagTestAndSet:
3632 ptr = vtn_pointer(b, w[3]);
3633 scope = vtn_constant_uint(b, w[4]);
3634 semantics = vtn_constant_uint(b, w[5]);
3635 break;
3636 case SpvOpAtomicFlagClear:
3637 case SpvOpAtomicStore:
3638 ptr = vtn_pointer(b, w[1]);
3639 scope = vtn_constant_uint(b, w[2]);
3640 semantics = vtn_constant_uint(b, w[3]);
3641 break;
3642
3643 default:
3644 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3645 }
3646
3647 if (semantics & SpvMemorySemanticsVolatileMask)
3648 access |= ACCESS_VOLATILE;
3649
3650 /* uniform as "atomic counter uniform" */
3651 if (ptr->mode == vtn_variable_mode_atomic_counter) {
3652 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
3653 nir_intrinsic_op op = get_uniform_nir_atomic_op(b, opcode);
3654 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
3655 atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa);
3656
3657 /* SSBO needs to initialize index/offset. In this case we don't need to,
3658 * as that info is already stored on the ptr->var->var nir_variable (see
3659 * vtn_create_variable)
3660 */
3661
3662 switch (opcode) {
3663 case SpvOpAtomicLoad:
3664 case SpvOpAtomicExchange:
3665 case SpvOpAtomicCompareExchange:
3666 case SpvOpAtomicCompareExchangeWeak:
3667 case SpvOpAtomicIIncrement:
3668 case SpvOpAtomicIDecrement:
3669 case SpvOpAtomicIAdd:
3670 case SpvOpAtomicISub:
3671 case SpvOpAtomicSMin:
3672 case SpvOpAtomicUMin:
3673 case SpvOpAtomicSMax:
3674 case SpvOpAtomicUMax:
3675 case SpvOpAtomicAnd:
3676 case SpvOpAtomicOr:
3677 case SpvOpAtomicXor:
3678 /* Nothing: we don't need to call fill_common_atomic_sources here, as
3679 * atomic counter uniforms doesn't have sources
3680 */
3681 break;
3682
3683 default:
3684 unreachable("Invalid SPIR-V atomic");
3685
3686 }
3687 } else {
3688 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
3689 const struct glsl_type *deref_type = deref->type;
3690 nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode);
3691 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
3692 atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa);
3693
3694 if (ptr->mode != vtn_variable_mode_workgroup)
3695 access |= ACCESS_COHERENT;
3696
3697 nir_intrinsic_set_access(atomic, access);
3698
3699 switch (opcode) {
3700 case SpvOpAtomicLoad:
3701 atomic->num_components = glsl_get_vector_elements(deref_type);
3702 break;
3703
3704 case SpvOpAtomicStore:
3705 atomic->num_components = glsl_get_vector_elements(deref_type);
3706 nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
3707 atomic->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
3708 break;
3709
3710 case SpvOpAtomicFlagClear:
3711 atomic->num_components = 1;
3712 nir_intrinsic_set_write_mask(atomic, 1);
3713 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
3714 break;
3715 case SpvOpAtomicFlagTestAndSet:
3716 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
3717 atomic->src[2] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, 32));
3718 break;
3719 case SpvOpAtomicExchange:
3720 case SpvOpAtomicCompareExchange:
3721 case SpvOpAtomicCompareExchangeWeak:
3722 case SpvOpAtomicIIncrement:
3723 case SpvOpAtomicIDecrement:
3724 case SpvOpAtomicIAdd:
3725 case SpvOpAtomicISub:
3726 case SpvOpAtomicSMin:
3727 case SpvOpAtomicUMin:
3728 case SpvOpAtomicSMax:
3729 case SpvOpAtomicUMax:
3730 case SpvOpAtomicAnd:
3731 case SpvOpAtomicOr:
3732 case SpvOpAtomicXor:
3733 case SpvOpAtomicFAddEXT:
3734 case SpvOpAtomicFMinEXT:
3735 case SpvOpAtomicFMaxEXT:
3736 fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
3737 break;
3738
3739 default:
3740 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3741 }
3742 }
3743
3744 /* Atomic ordering operations will implicitly apply to the atomic operation
3745 * storage class, so include that too.
3746 */
3747 semantics |= vtn_mode_to_memory_semantics(ptr->mode);
3748
3749 SpvMemorySemanticsMask before_semantics;
3750 SpvMemorySemanticsMask after_semantics;
3751 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
3752
3753 if (before_semantics)
3754 vtn_emit_memory_barrier(b, scope, before_semantics);
3755
3756 if (opcode != SpvOpAtomicStore && opcode != SpvOpAtomicFlagClear) {
3757 struct vtn_type *type = vtn_get_type(b, w[1]);
3758
3759 if (opcode == SpvOpAtomicFlagTestAndSet) {
3760 /* map atomic flag to a 32-bit atomic integer. */
3761 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
3762 1, 32, NULL);
3763 } else {
3764 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
3765 glsl_get_vector_elements(type->type),
3766 glsl_get_bit_size(type->type), NULL);
3767
3768 vtn_push_nir_ssa(b, w[2], &atomic->dest.ssa);
3769 }
3770 }
3771
3772 nir_builder_instr_insert(&b->nb, &atomic->instr);
3773
3774 if (opcode == SpvOpAtomicFlagTestAndSet) {
3775 vtn_push_nir_ssa(b, w[2], nir_i2b1(&b->nb, &atomic->dest.ssa));
3776 }
3777 if (after_semantics)
3778 vtn_emit_memory_barrier(b, scope, after_semantics);
3779 }
3780
3781 static nir_alu_instr *
create_vec(struct vtn_builder * b,unsigned num_components,unsigned bit_size)3782 create_vec(struct vtn_builder *b, unsigned num_components, unsigned bit_size)
3783 {
3784 nir_op op = nir_op_vec(num_components);
3785 nir_alu_instr *vec = nir_alu_instr_create(b->shader, op);
3786 nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components,
3787 bit_size, NULL);
3788 vec->dest.write_mask = (1 << num_components) - 1;
3789
3790 return vec;
3791 }
3792
3793 struct vtn_ssa_value *
vtn_ssa_transpose(struct vtn_builder * b,struct vtn_ssa_value * src)3794 vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
3795 {
3796 if (src->transposed)
3797 return src->transposed;
3798
3799 struct vtn_ssa_value *dest =
3800 vtn_create_ssa_value(b, glsl_transposed_type(src->type));
3801
3802 for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
3803 nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type),
3804 glsl_get_bit_size(src->type));
3805 if (glsl_type_is_vector_or_scalar(src->type)) {
3806 vec->src[0].src = nir_src_for_ssa(src->def);
3807 vec->src[0].swizzle[0] = i;
3808 } else {
3809 for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) {
3810 vec->src[j].src = nir_src_for_ssa(src->elems[j]->def);
3811 vec->src[j].swizzle[0] = i;
3812 }
3813 }
3814 nir_builder_instr_insert(&b->nb, &vec->instr);
3815 dest->elems[i]->def = &vec->dest.dest.ssa;
3816 }
3817
3818 dest->transposed = src;
3819
3820 return dest;
3821 }
3822
3823 static nir_ssa_def *
vtn_vector_shuffle(struct vtn_builder * b,unsigned num_components,nir_ssa_def * src0,nir_ssa_def * src1,const uint32_t * indices)3824 vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
3825 nir_ssa_def *src0, nir_ssa_def *src1,
3826 const uint32_t *indices)
3827 {
3828 nir_alu_instr *vec = create_vec(b, num_components, src0->bit_size);
3829
3830 for (unsigned i = 0; i < num_components; i++) {
3831 uint32_t index = indices[i];
3832 if (index == 0xffffffff) {
3833 vec->src[i].src =
3834 nir_src_for_ssa(nir_ssa_undef(&b->nb, 1, src0->bit_size));
3835 } else if (index < src0->num_components) {
3836 vec->src[i].src = nir_src_for_ssa(src0);
3837 vec->src[i].swizzle[0] = index;
3838 } else {
3839 vec->src[i].src = nir_src_for_ssa(src1);
3840 vec->src[i].swizzle[0] = index - src0->num_components;
3841 }
3842 }
3843
3844 nir_builder_instr_insert(&b->nb, &vec->instr);
3845
3846 return &vec->dest.dest.ssa;
3847 }
3848
3849 /*
3850 * Concatentates a number of vectors/scalars together to produce a vector
3851 */
3852 static nir_ssa_def *
vtn_vector_construct(struct vtn_builder * b,unsigned num_components,unsigned num_srcs,nir_ssa_def ** srcs)3853 vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
3854 unsigned num_srcs, nir_ssa_def **srcs)
3855 {
3856 nir_alu_instr *vec = create_vec(b, num_components, srcs[0]->bit_size);
3857
3858 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
3859 *
3860 * "When constructing a vector, there must be at least two Constituent
3861 * operands."
3862 */
3863 vtn_assert(num_srcs >= 2);
3864
3865 unsigned dest_idx = 0;
3866 for (unsigned i = 0; i < num_srcs; i++) {
3867 nir_ssa_def *src = srcs[i];
3868 vtn_assert(dest_idx + src->num_components <= num_components);
3869 for (unsigned j = 0; j < src->num_components; j++) {
3870 vec->src[dest_idx].src = nir_src_for_ssa(src);
3871 vec->src[dest_idx].swizzle[0] = j;
3872 dest_idx++;
3873 }
3874 }
3875
3876 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
3877 *
3878 * "When constructing a vector, the total number of components in all
3879 * the operands must equal the number of components in Result Type."
3880 */
3881 vtn_assert(dest_idx == num_components);
3882
3883 nir_builder_instr_insert(&b->nb, &vec->instr);
3884
3885 return &vec->dest.dest.ssa;
3886 }
3887
3888 static struct vtn_ssa_value *
vtn_composite_copy(void * mem_ctx,struct vtn_ssa_value * src)3889 vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src)
3890 {
3891 struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value);
3892 dest->type = src->type;
3893
3894 if (glsl_type_is_vector_or_scalar(src->type)) {
3895 dest->def = src->def;
3896 } else {
3897 unsigned elems = glsl_get_length(src->type);
3898
3899 dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems);
3900 for (unsigned i = 0; i < elems; i++)
3901 dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]);
3902 }
3903
3904 return dest;
3905 }
3906
3907 static struct vtn_ssa_value *
vtn_composite_insert(struct vtn_builder * b,struct vtn_ssa_value * src,struct vtn_ssa_value * insert,const uint32_t * indices,unsigned num_indices)3908 vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
3909 struct vtn_ssa_value *insert, const uint32_t *indices,
3910 unsigned num_indices)
3911 {
3912 struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
3913
3914 struct vtn_ssa_value *cur = dest;
3915 unsigned i;
3916 for (i = 0; i < num_indices - 1; i++) {
3917 /* If we got a vector here, that means the next index will be trying to
3918 * dereference a scalar.
3919 */
3920 vtn_fail_if(glsl_type_is_vector_or_scalar(cur->type),
3921 "OpCompositeInsert has too many indices.");
3922 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
3923 "All indices in an OpCompositeInsert must be in-bounds");
3924 cur = cur->elems[indices[i]];
3925 }
3926
3927 if (glsl_type_is_vector_or_scalar(cur->type)) {
3928 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
3929 "All indices in an OpCompositeInsert must be in-bounds");
3930
3931 /* According to the SPIR-V spec, OpCompositeInsert may work down to
3932 * the component granularity. In that case, the last index will be
3933 * the index to insert the scalar into the vector.
3934 */
3935
3936 cur->def = nir_vector_insert_imm(&b->nb, cur->def, insert->def, indices[i]);
3937 } else {
3938 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
3939 "All indices in an OpCompositeInsert must be in-bounds");
3940 cur->elems[indices[i]] = insert;
3941 }
3942
3943 return dest;
3944 }
3945
3946 static struct vtn_ssa_value *
vtn_composite_extract(struct vtn_builder * b,struct vtn_ssa_value * src,const uint32_t * indices,unsigned num_indices)3947 vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
3948 const uint32_t *indices, unsigned num_indices)
3949 {
3950 struct vtn_ssa_value *cur = src;
3951 for (unsigned i = 0; i < num_indices; i++) {
3952 if (glsl_type_is_vector_or_scalar(cur->type)) {
3953 vtn_assert(i == num_indices - 1);
3954 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
3955 "All indices in an OpCompositeExtract must be in-bounds");
3956
3957 /* According to the SPIR-V spec, OpCompositeExtract may work down to
3958 * the component granularity. The last index will be the index of the
3959 * vector to extract.
3960 */
3961
3962 const struct glsl_type *scalar_type =
3963 glsl_scalar_type(glsl_get_base_type(cur->type));
3964 struct vtn_ssa_value *ret = vtn_create_ssa_value(b, scalar_type);
3965 ret->def = nir_channel(&b->nb, cur->def, indices[i]);
3966 return ret;
3967 } else {
3968 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
3969 "All indices in an OpCompositeExtract must be in-bounds");
3970 cur = cur->elems[indices[i]];
3971 }
3972 }
3973
3974 return cur;
3975 }
3976
3977 static void
vtn_handle_composite(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3978 vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
3979 const uint32_t *w, unsigned count)
3980 {
3981 struct vtn_type *type = vtn_get_type(b, w[1]);
3982 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
3983
3984 switch (opcode) {
3985 case SpvOpVectorExtractDynamic:
3986 ssa->def = nir_vector_extract(&b->nb, vtn_get_nir_ssa(b, w[3]),
3987 vtn_get_nir_ssa(b, w[4]));
3988 break;
3989
3990 case SpvOpVectorInsertDynamic:
3991 ssa->def = nir_vector_insert(&b->nb, vtn_get_nir_ssa(b, w[3]),
3992 vtn_get_nir_ssa(b, w[4]),
3993 vtn_get_nir_ssa(b, w[5]));
3994 break;
3995
3996 case SpvOpVectorShuffle:
3997 ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type),
3998 vtn_get_nir_ssa(b, w[3]),
3999 vtn_get_nir_ssa(b, w[4]),
4000 w + 5);
4001 break;
4002
4003 case SpvOpCompositeConstruct: {
4004 unsigned elems = count - 3;
4005 assume(elems >= 1);
4006 if (glsl_type_is_vector_or_scalar(type->type)) {
4007 nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS];
4008 for (unsigned i = 0; i < elems; i++)
4009 srcs[i] = vtn_get_nir_ssa(b, w[3 + i]);
4010 ssa->def =
4011 vtn_vector_construct(b, glsl_get_vector_elements(type->type),
4012 elems, srcs);
4013 } else {
4014 ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
4015 for (unsigned i = 0; i < elems; i++)
4016 ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
4017 }
4018 break;
4019 }
4020 case SpvOpCompositeExtract:
4021 ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
4022 w + 4, count - 4);
4023 break;
4024
4025 case SpvOpCompositeInsert:
4026 ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
4027 vtn_ssa_value(b, w[3]),
4028 w + 5, count - 5);
4029 break;
4030
4031 case SpvOpCopyLogical:
4032 ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
4033 break;
4034 case SpvOpCopyObject:
4035 vtn_copy_value(b, w[3], w[2]);
4036 return;
4037
4038 default:
4039 vtn_fail_with_opcode("unknown composite operation", opcode);
4040 }
4041
4042 vtn_push_ssa_value(b, w[2], ssa);
4043 }
4044
4045 void
vtn_emit_memory_barrier(struct vtn_builder * b,SpvScope scope,SpvMemorySemanticsMask semantics)4046 vtn_emit_memory_barrier(struct vtn_builder *b, SpvScope scope,
4047 SpvMemorySemanticsMask semantics)
4048 {
4049 if (b->shader->options->use_scoped_barrier) {
4050 vtn_emit_scoped_memory_barrier(b, scope, semantics);
4051 return;
4052 }
4053
4054 static const SpvMemorySemanticsMask all_memory_semantics =
4055 SpvMemorySemanticsUniformMemoryMask |
4056 SpvMemorySemanticsWorkgroupMemoryMask |
4057 SpvMemorySemanticsAtomicCounterMemoryMask |
4058 SpvMemorySemanticsImageMemoryMask |
4059 SpvMemorySemanticsOutputMemoryMask;
4060
4061 /* If we're not actually doing a memory barrier, bail */
4062 if (!(semantics & all_memory_semantics))
4063 return;
4064
4065 /* GL and Vulkan don't have these */
4066 vtn_assert(scope != SpvScopeCrossDevice);
4067
4068 if (scope == SpvScopeSubgroup)
4069 return; /* Nothing to do here */
4070
4071 if (scope == SpvScopeWorkgroup) {
4072 nir_group_memory_barrier(&b->nb);
4073 return;
4074 }
4075
4076 /* There's only two scopes thing left */
4077 vtn_assert(scope == SpvScopeInvocation || scope == SpvScopeDevice);
4078
4079 /* Map the GLSL memoryBarrier() construct and any barriers with more than one
4080 * semantic to the corresponding NIR one.
4081 */
4082 if (util_bitcount(semantics & all_memory_semantics) > 1) {
4083 nir_memory_barrier(&b->nb);
4084 if (semantics & SpvMemorySemanticsOutputMemoryMask) {
4085 /* GLSL memoryBarrier() (and the corresponding NIR one) doesn't include
4086 * TCS outputs, so we have to emit it's own intrinsic for that. We
4087 * then need to emit another memory_barrier to prevent moving
4088 * non-output operations to before the tcs_patch barrier.
4089 */
4090 nir_memory_barrier_tcs_patch(&b->nb);
4091 nir_memory_barrier(&b->nb);
4092 }
4093 return;
4094 }
4095
4096 /* Issue a more specific barrier */
4097 switch (semantics & all_memory_semantics) {
4098 case SpvMemorySemanticsUniformMemoryMask:
4099 nir_memory_barrier_buffer(&b->nb);
4100 break;
4101 case SpvMemorySemanticsWorkgroupMemoryMask:
4102 nir_memory_barrier_shared(&b->nb);
4103 break;
4104 case SpvMemorySemanticsAtomicCounterMemoryMask:
4105 nir_memory_barrier_atomic_counter(&b->nb);
4106 break;
4107 case SpvMemorySemanticsImageMemoryMask:
4108 nir_memory_barrier_image(&b->nb);
4109 break;
4110 case SpvMemorySemanticsOutputMemoryMask:
4111 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL)
4112 nir_memory_barrier_tcs_patch(&b->nb);
4113 break;
4114 default:
4115 break;
4116 }
4117 }
4118
4119 static void
vtn_handle_barrier(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4120 vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
4121 const uint32_t *w, UNUSED unsigned count)
4122 {
4123 switch (opcode) {
4124 case SpvOpEmitVertex:
4125 case SpvOpEmitStreamVertex:
4126 case SpvOpEndPrimitive:
4127 case SpvOpEndStreamPrimitive: {
4128 unsigned stream = 0;
4129 if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
4130 stream = vtn_constant_uint(b, w[1]);
4131
4132 switch (opcode) {
4133 case SpvOpEmitStreamVertex:
4134 case SpvOpEmitVertex:
4135 nir_emit_vertex(&b->nb, stream);
4136 break;
4137 case SpvOpEndPrimitive:
4138 case SpvOpEndStreamPrimitive:
4139 nir_end_primitive(&b->nb, stream);
4140 break;
4141 default:
4142 unreachable("Invalid opcode");
4143 }
4144 break;
4145 }
4146
4147 case SpvOpMemoryBarrier: {
4148 SpvScope scope = vtn_constant_uint(b, w[1]);
4149 SpvMemorySemanticsMask semantics = vtn_constant_uint(b, w[2]);
4150 vtn_emit_memory_barrier(b, scope, semantics);
4151 return;
4152 }
4153
4154 case SpvOpControlBarrier: {
4155 SpvScope execution_scope = vtn_constant_uint(b, w[1]);
4156 SpvScope memory_scope = vtn_constant_uint(b, w[2]);
4157 SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]);
4158
4159 /* GLSLang, prior to commit 8297936dd6eb3, emitted OpControlBarrier with
4160 * memory semantics of None for GLSL barrier().
4161 * And before that, prior to c3f1cdfa, emitted the OpControlBarrier with
4162 * Device instead of Workgroup for execution scope.
4163 */
4164 if (b->wa_glslang_cs_barrier &&
4165 b->nb.shader->info.stage == MESA_SHADER_COMPUTE &&
4166 (execution_scope == SpvScopeWorkgroup ||
4167 execution_scope == SpvScopeDevice) &&
4168 memory_semantics == SpvMemorySemanticsMaskNone) {
4169 execution_scope = SpvScopeWorkgroup;
4170 memory_scope = SpvScopeWorkgroup;
4171 memory_semantics = SpvMemorySemanticsAcquireReleaseMask |
4172 SpvMemorySemanticsWorkgroupMemoryMask;
4173 }
4174
4175 /* From the SPIR-V spec:
4176 *
4177 * "When used with the TessellationControl execution model, it also
4178 * implicitly synchronizes the Output Storage Class: Writes to Output
4179 * variables performed by any invocation executed prior to a
4180 * OpControlBarrier will be visible to any other invocation after
4181 * return from that OpControlBarrier."
4182 *
4183 * The same applies to VK_NV_mesh_shader.
4184 */
4185 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL ||
4186 b->nb.shader->info.stage == MESA_SHADER_TASK ||
4187 b->nb.shader->info.stage == MESA_SHADER_MESH) {
4188 memory_semantics &= ~(SpvMemorySemanticsAcquireMask |
4189 SpvMemorySemanticsReleaseMask |
4190 SpvMemorySemanticsAcquireReleaseMask |
4191 SpvMemorySemanticsSequentiallyConsistentMask);
4192 memory_semantics |= SpvMemorySemanticsAcquireReleaseMask |
4193 SpvMemorySemanticsOutputMemoryMask;
4194 }
4195
4196 if (b->shader->options->use_scoped_barrier) {
4197 vtn_emit_scoped_control_barrier(b, execution_scope, memory_scope,
4198 memory_semantics);
4199 } else {
4200 vtn_emit_memory_barrier(b, memory_scope, memory_semantics);
4201
4202 if (execution_scope == SpvScopeWorkgroup)
4203 nir_control_barrier(&b->nb);
4204 }
4205 break;
4206 }
4207
4208 default:
4209 unreachable("unknown barrier instruction");
4210 }
4211 }
4212
4213 static unsigned
gl_primitive_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4214 gl_primitive_from_spv_execution_mode(struct vtn_builder *b,
4215 SpvExecutionMode mode)
4216 {
4217 switch (mode) {
4218 case SpvExecutionModeInputPoints:
4219 case SpvExecutionModeOutputPoints:
4220 return 0; /* GL_POINTS */
4221 case SpvExecutionModeInputLines:
4222 case SpvExecutionModeOutputLinesNV:
4223 return 1; /* GL_LINES */
4224 case SpvExecutionModeInputLinesAdjacency:
4225 return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */
4226 case SpvExecutionModeTriangles:
4227 case SpvExecutionModeOutputTrianglesNV:
4228 return 4; /* GL_TRIANGLES */
4229 case SpvExecutionModeInputTrianglesAdjacency:
4230 return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */
4231 case SpvExecutionModeQuads:
4232 return 7; /* GL_QUADS */
4233 case SpvExecutionModeIsolines:
4234 return 0x8E7A; /* GL_ISOLINES */
4235 case SpvExecutionModeOutputLineStrip:
4236 return 3; /* GL_LINE_STRIP */
4237 case SpvExecutionModeOutputTriangleStrip:
4238 return 5; /* GL_TRIANGLE_STRIP */
4239 default:
4240 vtn_fail("Invalid primitive type: %s (%u)",
4241 spirv_executionmode_to_string(mode), mode);
4242 }
4243 }
4244
4245 static unsigned
vertices_in_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4246 vertices_in_from_spv_execution_mode(struct vtn_builder *b,
4247 SpvExecutionMode mode)
4248 {
4249 switch (mode) {
4250 case SpvExecutionModeInputPoints:
4251 return 1;
4252 case SpvExecutionModeInputLines:
4253 return 2;
4254 case SpvExecutionModeInputLinesAdjacency:
4255 return 4;
4256 case SpvExecutionModeTriangles:
4257 return 3;
4258 case SpvExecutionModeInputTrianglesAdjacency:
4259 return 6;
4260 default:
4261 vtn_fail("Invalid GS input mode: %s (%u)",
4262 spirv_executionmode_to_string(mode), mode);
4263 }
4264 }
4265
4266 static gl_shader_stage
stage_for_execution_model(struct vtn_builder * b,SpvExecutionModel model)4267 stage_for_execution_model(struct vtn_builder *b, SpvExecutionModel model)
4268 {
4269 switch (model) {
4270 case SpvExecutionModelVertex:
4271 return MESA_SHADER_VERTEX;
4272 case SpvExecutionModelTessellationControl:
4273 return MESA_SHADER_TESS_CTRL;
4274 case SpvExecutionModelTessellationEvaluation:
4275 return MESA_SHADER_TESS_EVAL;
4276 case SpvExecutionModelGeometry:
4277 return MESA_SHADER_GEOMETRY;
4278 case SpvExecutionModelFragment:
4279 return MESA_SHADER_FRAGMENT;
4280 case SpvExecutionModelGLCompute:
4281 return MESA_SHADER_COMPUTE;
4282 case SpvExecutionModelKernel:
4283 return MESA_SHADER_KERNEL;
4284 case SpvExecutionModelRayGenerationKHR:
4285 return MESA_SHADER_RAYGEN;
4286 case SpvExecutionModelAnyHitKHR:
4287 return MESA_SHADER_ANY_HIT;
4288 case SpvExecutionModelClosestHitKHR:
4289 return MESA_SHADER_CLOSEST_HIT;
4290 case SpvExecutionModelMissKHR:
4291 return MESA_SHADER_MISS;
4292 case SpvExecutionModelIntersectionKHR:
4293 return MESA_SHADER_INTERSECTION;
4294 case SpvExecutionModelCallableKHR:
4295 return MESA_SHADER_CALLABLE;
4296 case SpvExecutionModelTaskNV:
4297 return MESA_SHADER_TASK;
4298 case SpvExecutionModelMeshNV:
4299 return MESA_SHADER_MESH;
4300 default:
4301 vtn_fail("Unsupported execution model: %s (%u)",
4302 spirv_executionmodel_to_string(model), model);
4303 }
4304 }
4305
4306 #define spv_check_supported(name, cap) do { \
4307 if (!(b->options && b->options->caps.name)) \
4308 vtn_warn("Unsupported SPIR-V capability: %s (%u)", \
4309 spirv_capability_to_string(cap), cap); \
4310 } while(0)
4311
4312
4313 void
vtn_handle_entry_point(struct vtn_builder * b,const uint32_t * w,unsigned count)4314 vtn_handle_entry_point(struct vtn_builder *b, const uint32_t *w,
4315 unsigned count)
4316 {
4317 struct vtn_value *entry_point = &b->values[w[2]];
4318 /* Let this be a name label regardless */
4319 unsigned name_words;
4320 entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
4321
4322 if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
4323 stage_for_execution_model(b, w[1]) != b->entry_point_stage)
4324 return;
4325
4326 vtn_assert(b->entry_point == NULL);
4327 b->entry_point = entry_point;
4328
4329 /* Entry points enumerate which global variables are used. */
4330 size_t start = 3 + name_words;
4331 b->interface_ids_count = count - start;
4332 b->interface_ids = ralloc_array(b, uint32_t, b->interface_ids_count);
4333 memcpy(b->interface_ids, &w[start], b->interface_ids_count * 4);
4334 qsort(b->interface_ids, b->interface_ids_count, 4, cmp_uint32_t);
4335 }
4336
4337 static bool
vtn_handle_preamble_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4338 vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
4339 const uint32_t *w, unsigned count)
4340 {
4341 switch (opcode) {
4342 case SpvOpSource: {
4343 const char *lang;
4344 switch (w[1]) {
4345 default:
4346 case SpvSourceLanguageUnknown: lang = "unknown"; break;
4347 case SpvSourceLanguageESSL: lang = "ESSL"; break;
4348 case SpvSourceLanguageGLSL: lang = "GLSL"; break;
4349 case SpvSourceLanguageOpenCL_C: lang = "OpenCL C"; break;
4350 case SpvSourceLanguageOpenCL_CPP: lang = "OpenCL C++"; break;
4351 case SpvSourceLanguageHLSL: lang = "HLSL"; break;
4352 }
4353
4354 uint32_t version = w[2];
4355
4356 const char *file =
4357 (count > 3) ? vtn_value(b, w[3], vtn_value_type_string)->str : "";
4358
4359 vtn_info("Parsing SPIR-V from %s %u source file %s", lang, version, file);
4360
4361 b->source_lang = w[1];
4362 break;
4363 }
4364
4365 case SpvOpSourceExtension:
4366 case SpvOpSourceContinued:
4367 case SpvOpExtension:
4368 case SpvOpModuleProcessed:
4369 /* Unhandled, but these are for debug so that's ok. */
4370 break;
4371
4372 case SpvOpCapability: {
4373 SpvCapability cap = w[1];
4374 switch (cap) {
4375 case SpvCapabilityMatrix:
4376 case SpvCapabilityShader:
4377 case SpvCapabilityGeometry:
4378 case SpvCapabilityGeometryPointSize:
4379 case SpvCapabilityUniformBufferArrayDynamicIndexing:
4380 case SpvCapabilitySampledImageArrayDynamicIndexing:
4381 case SpvCapabilityStorageBufferArrayDynamicIndexing:
4382 case SpvCapabilityStorageImageArrayDynamicIndexing:
4383 case SpvCapabilityImageRect:
4384 case SpvCapabilitySampledRect:
4385 case SpvCapabilitySampled1D:
4386 case SpvCapabilityImage1D:
4387 case SpvCapabilitySampledCubeArray:
4388 case SpvCapabilityImageCubeArray:
4389 case SpvCapabilitySampledBuffer:
4390 case SpvCapabilityImageBuffer:
4391 case SpvCapabilityImageQuery:
4392 case SpvCapabilityDerivativeControl:
4393 case SpvCapabilityInterpolationFunction:
4394 case SpvCapabilityMultiViewport:
4395 case SpvCapabilitySampleRateShading:
4396 case SpvCapabilityClipDistance:
4397 case SpvCapabilityCullDistance:
4398 case SpvCapabilityInputAttachment:
4399 case SpvCapabilityImageGatherExtended:
4400 case SpvCapabilityStorageImageExtendedFormats:
4401 case SpvCapabilityVector16:
4402 case SpvCapabilityDotProductKHR:
4403 case SpvCapabilityDotProductInputAllKHR:
4404 case SpvCapabilityDotProductInput4x8BitKHR:
4405 case SpvCapabilityDotProductInput4x8BitPackedKHR:
4406 break;
4407
4408 case SpvCapabilityLinkage:
4409 if (!b->options->create_library)
4410 vtn_warn("Unsupported SPIR-V capability: %s",
4411 spirv_capability_to_string(cap));
4412 break;
4413
4414 case SpvCapabilitySparseResidency:
4415 spv_check_supported(sparse_residency, cap);
4416 break;
4417
4418 case SpvCapabilityMinLod:
4419 spv_check_supported(min_lod, cap);
4420 break;
4421
4422 case SpvCapabilityAtomicStorage:
4423 spv_check_supported(atomic_storage, cap);
4424 break;
4425
4426 case SpvCapabilityFloat64:
4427 spv_check_supported(float64, cap);
4428 break;
4429 case SpvCapabilityInt64:
4430 spv_check_supported(int64, cap);
4431 break;
4432 case SpvCapabilityInt16:
4433 spv_check_supported(int16, cap);
4434 break;
4435 case SpvCapabilityInt8:
4436 spv_check_supported(int8, cap);
4437 break;
4438
4439 case SpvCapabilityTransformFeedback:
4440 spv_check_supported(transform_feedback, cap);
4441 break;
4442
4443 case SpvCapabilityGeometryStreams:
4444 spv_check_supported(geometry_streams, cap);
4445 break;
4446
4447 case SpvCapabilityInt64Atomics:
4448 spv_check_supported(int64_atomics, cap);
4449 break;
4450
4451 case SpvCapabilityStorageImageMultisample:
4452 spv_check_supported(storage_image_ms, cap);
4453 break;
4454
4455 case SpvCapabilityAddresses:
4456 spv_check_supported(address, cap);
4457 break;
4458
4459 case SpvCapabilityKernel:
4460 case SpvCapabilityFloat16Buffer:
4461 spv_check_supported(kernel, cap);
4462 break;
4463
4464 case SpvCapabilityGenericPointer:
4465 spv_check_supported(generic_pointers, cap);
4466 break;
4467
4468 case SpvCapabilityImageBasic:
4469 spv_check_supported(kernel_image, cap);
4470 break;
4471
4472 case SpvCapabilityImageReadWrite:
4473 spv_check_supported(kernel_image_read_write, cap);
4474 break;
4475
4476 case SpvCapabilityLiteralSampler:
4477 spv_check_supported(literal_sampler, cap);
4478 break;
4479
4480 case SpvCapabilityImageMipmap:
4481 case SpvCapabilityPipes:
4482 case SpvCapabilityDeviceEnqueue:
4483 vtn_warn("Unsupported OpenCL-style SPIR-V capability: %s",
4484 spirv_capability_to_string(cap));
4485 break;
4486
4487 case SpvCapabilityImageMSArray:
4488 spv_check_supported(image_ms_array, cap);
4489 break;
4490
4491 case SpvCapabilityTessellation:
4492 case SpvCapabilityTessellationPointSize:
4493 spv_check_supported(tessellation, cap);
4494 break;
4495
4496 case SpvCapabilityDrawParameters:
4497 spv_check_supported(draw_parameters, cap);
4498 break;
4499
4500 case SpvCapabilityStorageImageReadWithoutFormat:
4501 spv_check_supported(image_read_without_format, cap);
4502 break;
4503
4504 case SpvCapabilityStorageImageWriteWithoutFormat:
4505 spv_check_supported(image_write_without_format, cap);
4506 break;
4507
4508 case SpvCapabilityDeviceGroup:
4509 spv_check_supported(device_group, cap);
4510 break;
4511
4512 case SpvCapabilityMultiView:
4513 spv_check_supported(multiview, cap);
4514 break;
4515
4516 case SpvCapabilityGroupNonUniform:
4517 spv_check_supported(subgroup_basic, cap);
4518 break;
4519
4520 case SpvCapabilitySubgroupVoteKHR:
4521 case SpvCapabilityGroupNonUniformVote:
4522 spv_check_supported(subgroup_vote, cap);
4523 break;
4524
4525 case SpvCapabilitySubgroupBallotKHR:
4526 case SpvCapabilityGroupNonUniformBallot:
4527 spv_check_supported(subgroup_ballot, cap);
4528 break;
4529
4530 case SpvCapabilityGroupNonUniformShuffle:
4531 case SpvCapabilityGroupNonUniformShuffleRelative:
4532 spv_check_supported(subgroup_shuffle, cap);
4533 break;
4534
4535 case SpvCapabilityGroupNonUniformQuad:
4536 spv_check_supported(subgroup_quad, cap);
4537 break;
4538
4539 case SpvCapabilityGroupNonUniformArithmetic:
4540 case SpvCapabilityGroupNonUniformClustered:
4541 spv_check_supported(subgroup_arithmetic, cap);
4542 break;
4543
4544 case SpvCapabilityGroups:
4545 spv_check_supported(groups, cap);
4546 break;
4547
4548 case SpvCapabilitySubgroupDispatch:
4549 spv_check_supported(subgroup_dispatch, cap);
4550 /* Missing :
4551 * - SpvOpGetKernelLocalSizeForSubgroupCount
4552 * - SpvOpGetKernelMaxNumSubgroups
4553 * - SpvExecutionModeSubgroupsPerWorkgroup
4554 * - SpvExecutionModeSubgroupsPerWorkgroupId
4555 */
4556 vtn_warn("Not fully supported capability: %s",
4557 spirv_capability_to_string(cap));
4558 break;
4559
4560 case SpvCapabilityVariablePointersStorageBuffer:
4561 case SpvCapabilityVariablePointers:
4562 spv_check_supported(variable_pointers, cap);
4563 b->variable_pointers = true;
4564 break;
4565
4566 case SpvCapabilityStorageUniformBufferBlock16:
4567 case SpvCapabilityStorageUniform16:
4568 case SpvCapabilityStoragePushConstant16:
4569 case SpvCapabilityStorageInputOutput16:
4570 spv_check_supported(storage_16bit, cap);
4571 break;
4572
4573 case SpvCapabilityShaderLayer:
4574 case SpvCapabilityShaderViewportIndex:
4575 case SpvCapabilityShaderViewportIndexLayerEXT:
4576 spv_check_supported(shader_viewport_index_layer, cap);
4577 break;
4578
4579 case SpvCapabilityStorageBuffer8BitAccess:
4580 case SpvCapabilityUniformAndStorageBuffer8BitAccess:
4581 case SpvCapabilityStoragePushConstant8:
4582 spv_check_supported(storage_8bit, cap);
4583 break;
4584
4585 case SpvCapabilityShaderNonUniformEXT:
4586 spv_check_supported(descriptor_indexing, cap);
4587 break;
4588
4589 case SpvCapabilityInputAttachmentArrayDynamicIndexingEXT:
4590 case SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT:
4591 case SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT:
4592 spv_check_supported(descriptor_array_dynamic_indexing, cap);
4593 break;
4594
4595 case SpvCapabilityUniformBufferArrayNonUniformIndexingEXT:
4596 case SpvCapabilitySampledImageArrayNonUniformIndexingEXT:
4597 case SpvCapabilityStorageBufferArrayNonUniformIndexingEXT:
4598 case SpvCapabilityStorageImageArrayNonUniformIndexingEXT:
4599 case SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT:
4600 case SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT:
4601 case SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT:
4602 spv_check_supported(descriptor_array_non_uniform_indexing, cap);
4603 break;
4604
4605 case SpvCapabilityRuntimeDescriptorArrayEXT:
4606 spv_check_supported(runtime_descriptor_array, cap);
4607 break;
4608
4609 case SpvCapabilityStencilExportEXT:
4610 spv_check_supported(stencil_export, cap);
4611 break;
4612
4613 case SpvCapabilitySampleMaskPostDepthCoverage:
4614 spv_check_supported(post_depth_coverage, cap);
4615 break;
4616
4617 case SpvCapabilityDenormFlushToZero:
4618 case SpvCapabilityDenormPreserve:
4619 case SpvCapabilitySignedZeroInfNanPreserve:
4620 case SpvCapabilityRoundingModeRTE:
4621 case SpvCapabilityRoundingModeRTZ:
4622 spv_check_supported(float_controls, cap);
4623 break;
4624
4625 case SpvCapabilityPhysicalStorageBufferAddresses:
4626 spv_check_supported(physical_storage_buffer_address, cap);
4627 break;
4628
4629 case SpvCapabilityComputeDerivativeGroupQuadsNV:
4630 case SpvCapabilityComputeDerivativeGroupLinearNV:
4631 spv_check_supported(derivative_group, cap);
4632 break;
4633
4634 case SpvCapabilityFloat16:
4635 spv_check_supported(float16, cap);
4636 break;
4637
4638 case SpvCapabilityFragmentShaderSampleInterlockEXT:
4639 spv_check_supported(fragment_shader_sample_interlock, cap);
4640 break;
4641
4642 case SpvCapabilityFragmentShaderPixelInterlockEXT:
4643 spv_check_supported(fragment_shader_pixel_interlock, cap);
4644 break;
4645
4646 case SpvCapabilityDemoteToHelperInvocationEXT:
4647 spv_check_supported(demote_to_helper_invocation, cap);
4648 b->uses_demote_to_helper_invocation = true;
4649 break;
4650
4651 case SpvCapabilityShaderClockKHR:
4652 spv_check_supported(shader_clock, cap);
4653 break;
4654
4655 case SpvCapabilityVulkanMemoryModel:
4656 spv_check_supported(vk_memory_model, cap);
4657 break;
4658
4659 case SpvCapabilityVulkanMemoryModelDeviceScope:
4660 spv_check_supported(vk_memory_model_device_scope, cap);
4661 break;
4662
4663 case SpvCapabilityImageReadWriteLodAMD:
4664 spv_check_supported(amd_image_read_write_lod, cap);
4665 break;
4666
4667 case SpvCapabilityIntegerFunctions2INTEL:
4668 spv_check_supported(integer_functions2, cap);
4669 break;
4670
4671 case SpvCapabilityFragmentMaskAMD:
4672 spv_check_supported(amd_fragment_mask, cap);
4673 break;
4674
4675 case SpvCapabilityImageGatherBiasLodAMD:
4676 spv_check_supported(amd_image_gather_bias_lod, cap);
4677 break;
4678
4679 case SpvCapabilityAtomicFloat16AddEXT:
4680 spv_check_supported(float16_atomic_add, cap);
4681 break;
4682
4683 case SpvCapabilityAtomicFloat32AddEXT:
4684 spv_check_supported(float32_atomic_add, cap);
4685 break;
4686
4687 case SpvCapabilityAtomicFloat64AddEXT:
4688 spv_check_supported(float64_atomic_add, cap);
4689 break;
4690
4691 case SpvCapabilitySubgroupShuffleINTEL:
4692 spv_check_supported(intel_subgroup_shuffle, cap);
4693 break;
4694
4695 case SpvCapabilitySubgroupBufferBlockIOINTEL:
4696 spv_check_supported(intel_subgroup_buffer_block_io, cap);
4697 break;
4698
4699 case SpvCapabilityRayTracingKHR:
4700 spv_check_supported(ray_tracing, cap);
4701 break;
4702
4703 case SpvCapabilityRayQueryKHR:
4704 spv_check_supported(ray_query, cap);
4705 break;
4706
4707 case SpvCapabilityRayTraversalPrimitiveCullingKHR:
4708 spv_check_supported(ray_traversal_primitive_culling, cap);
4709 break;
4710
4711 case SpvCapabilityInt64ImageEXT:
4712 spv_check_supported(image_atomic_int64, cap);
4713 break;
4714
4715 case SpvCapabilityFragmentShadingRateKHR:
4716 spv_check_supported(fragment_shading_rate, cap);
4717 break;
4718
4719 case SpvCapabilityWorkgroupMemoryExplicitLayoutKHR:
4720 spv_check_supported(workgroup_memory_explicit_layout, cap);
4721 break;
4722
4723 case SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR:
4724 spv_check_supported(workgroup_memory_explicit_layout, cap);
4725 spv_check_supported(storage_8bit, cap);
4726 break;
4727
4728 case SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR:
4729 spv_check_supported(workgroup_memory_explicit_layout, cap);
4730 spv_check_supported(storage_16bit, cap);
4731 break;
4732
4733 case SpvCapabilityAtomicFloat16MinMaxEXT:
4734 spv_check_supported(float16_atomic_min_max, cap);
4735 break;
4736
4737 case SpvCapabilityAtomicFloat32MinMaxEXT:
4738 spv_check_supported(float32_atomic_min_max, cap);
4739 break;
4740
4741 case SpvCapabilityAtomicFloat64MinMaxEXT:
4742 spv_check_supported(float64_atomic_min_max, cap);
4743 break;
4744
4745 case SpvCapabilityMeshShadingNV:
4746 spv_check_supported(mesh_shading_nv, cap);
4747 break;
4748
4749 default:
4750 vtn_fail("Unhandled capability: %s (%u)",
4751 spirv_capability_to_string(cap), cap);
4752 }
4753 break;
4754 }
4755
4756 case SpvOpExtInstImport:
4757 vtn_handle_extension(b, opcode, w, count);
4758 break;
4759
4760 case SpvOpMemoryModel:
4761 switch (w[1]) {
4762 case SpvAddressingModelPhysical32:
4763 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4764 "AddressingModelPhysical32 only supported for kernels");
4765 b->shader->info.cs.ptr_size = 32;
4766 b->physical_ptrs = true;
4767 assert(nir_address_format_bit_size(b->options->global_addr_format) == 32);
4768 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4769 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 32);
4770 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4771 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 32);
4772 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4773 break;
4774 case SpvAddressingModelPhysical64:
4775 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4776 "AddressingModelPhysical64 only supported for kernels");
4777 b->shader->info.cs.ptr_size = 64;
4778 b->physical_ptrs = true;
4779 assert(nir_address_format_bit_size(b->options->global_addr_format) == 64);
4780 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4781 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 64);
4782 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4783 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 64);
4784 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4785 break;
4786 case SpvAddressingModelLogical:
4787 vtn_fail_if(b->shader->info.stage == MESA_SHADER_KERNEL,
4788 "AddressingModelLogical only supported for shaders");
4789 b->physical_ptrs = false;
4790 break;
4791 case SpvAddressingModelPhysicalStorageBuffer64:
4792 vtn_fail_if(!b->options ||
4793 !b->options->caps.physical_storage_buffer_address,
4794 "AddressingModelPhysicalStorageBuffer64 not supported");
4795 break;
4796 default:
4797 vtn_fail("Unknown addressing model: %s (%u)",
4798 spirv_addressingmodel_to_string(w[1]), w[1]);
4799 break;
4800 }
4801
4802 b->mem_model = w[2];
4803 switch (w[2]) {
4804 case SpvMemoryModelSimple:
4805 case SpvMemoryModelGLSL450:
4806 case SpvMemoryModelOpenCL:
4807 break;
4808 case SpvMemoryModelVulkan:
4809 vtn_fail_if(!b->options->caps.vk_memory_model,
4810 "Vulkan memory model is unsupported by this driver");
4811 break;
4812 default:
4813 vtn_fail("Unsupported memory model: %s",
4814 spirv_memorymodel_to_string(w[2]));
4815 break;
4816 }
4817 break;
4818
4819 case SpvOpEntryPoint:
4820 vtn_handle_entry_point(b, w, count);
4821 break;
4822
4823 case SpvOpString:
4824 vtn_push_value(b, w[1], vtn_value_type_string)->str =
4825 vtn_string_literal(b, &w[2], count - 2, NULL);
4826 break;
4827
4828 case SpvOpName:
4829 b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
4830 break;
4831
4832 case SpvOpMemberName:
4833 /* TODO */
4834 break;
4835
4836 case SpvOpExecutionMode:
4837 case SpvOpExecutionModeId:
4838 case SpvOpDecorationGroup:
4839 case SpvOpDecorate:
4840 case SpvOpDecorateId:
4841 case SpvOpMemberDecorate:
4842 case SpvOpGroupDecorate:
4843 case SpvOpGroupMemberDecorate:
4844 case SpvOpDecorateString:
4845 case SpvOpMemberDecorateString:
4846 vtn_handle_decoration(b, opcode, w, count);
4847 break;
4848
4849 case SpvOpExtInst: {
4850 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
4851 if (val->ext_handler == vtn_handle_non_semantic_instruction) {
4852 /* NonSemantic extended instructions are acceptable in preamble. */
4853 vtn_handle_non_semantic_instruction(b, w[4], w, count);
4854 return true;
4855 } else {
4856 return false; /* End of preamble. */
4857 }
4858 }
4859
4860 default:
4861 return false; /* End of preamble */
4862 }
4863
4864 return true;
4865 }
4866
4867 static void
vtn_handle_execution_mode(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)4868 vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
4869 const struct vtn_decoration *mode, UNUSED void *data)
4870 {
4871 vtn_assert(b->entry_point == entry_point);
4872
4873 switch(mode->exec_mode) {
4874 case SpvExecutionModeOriginUpperLeft:
4875 case SpvExecutionModeOriginLowerLeft:
4876 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4877 b->shader->info.fs.origin_upper_left =
4878 (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
4879 break;
4880
4881 case SpvExecutionModeEarlyFragmentTests:
4882 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4883 b->shader->info.fs.early_fragment_tests = true;
4884 break;
4885
4886 case SpvExecutionModePostDepthCoverage:
4887 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4888 b->shader->info.fs.post_depth_coverage = true;
4889 break;
4890
4891 case SpvExecutionModeInvocations:
4892 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
4893 b->shader->info.gs.invocations = MAX2(1, mode->operands[0]);
4894 break;
4895
4896 case SpvExecutionModeDepthReplacing:
4897 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4898 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
4899 break;
4900 case SpvExecutionModeDepthGreater:
4901 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4902 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
4903 break;
4904 case SpvExecutionModeDepthLess:
4905 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4906 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
4907 break;
4908 case SpvExecutionModeDepthUnchanged:
4909 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4910 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
4911 break;
4912
4913 case SpvExecutionModeLocalSizeHint:
4914 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
4915 b->shader->info.cs.workgroup_size_hint[0] = mode->operands[0];
4916 b->shader->info.cs.workgroup_size_hint[1] = mode->operands[1];
4917 b->shader->info.cs.workgroup_size_hint[2] = mode->operands[2];
4918 break;
4919
4920 case SpvExecutionModeLocalSize:
4921 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
4922 b->shader->info.workgroup_size[0] = mode->operands[0];
4923 b->shader->info.workgroup_size[1] = mode->operands[1];
4924 b->shader->info.workgroup_size[2] = mode->operands[2];
4925 } else {
4926 vtn_fail("Execution mode LocalSize not supported in stage %s",
4927 _mesa_shader_stage_to_string(b->shader->info.stage));
4928 }
4929 break;
4930
4931 case SpvExecutionModeOutputVertices:
4932 switch (b->shader->info.stage) {
4933 case MESA_SHADER_TESS_CTRL:
4934 case MESA_SHADER_TESS_EVAL:
4935 b->shader->info.tess.tcs_vertices_out = mode->operands[0];
4936 break;
4937 case MESA_SHADER_GEOMETRY:
4938 b->shader->info.gs.vertices_out = mode->operands[0];
4939 break;
4940 case MESA_SHADER_MESH:
4941 b->shader->info.mesh.max_vertices_out = mode->operands[0];
4942 break;
4943 default:
4944 vtn_fail("Execution mode OutputVertices not supported in stage %s",
4945 _mesa_shader_stage_to_string(b->shader->info.stage));
4946 break;
4947 }
4948 break;
4949
4950 case SpvExecutionModeInputPoints:
4951 case SpvExecutionModeInputLines:
4952 case SpvExecutionModeInputLinesAdjacency:
4953 case SpvExecutionModeTriangles:
4954 case SpvExecutionModeInputTrianglesAdjacency:
4955 case SpvExecutionModeQuads:
4956 case SpvExecutionModeIsolines:
4957 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
4958 b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
4959 b->shader->info.tess.primitive_mode =
4960 gl_primitive_from_spv_execution_mode(b, mode->exec_mode);
4961 } else {
4962 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
4963 b->shader->info.gs.vertices_in =
4964 vertices_in_from_spv_execution_mode(b, mode->exec_mode);
4965 b->shader->info.gs.input_primitive =
4966 gl_primitive_from_spv_execution_mode(b, mode->exec_mode);
4967 }
4968 break;
4969
4970 case SpvExecutionModeOutputPrimitivesNV:
4971 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
4972 b->shader->info.mesh.max_primitives_out = mode->operands[0];
4973 break;
4974
4975 case SpvExecutionModeOutputLinesNV:
4976 case SpvExecutionModeOutputTrianglesNV:
4977 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
4978 b->shader->info.mesh.primitive_type =
4979 gl_primitive_from_spv_execution_mode(b, mode->exec_mode);
4980 break;
4981
4982 case SpvExecutionModeOutputPoints: {
4983 const unsigned primitive =
4984 gl_primitive_from_spv_execution_mode(b, mode->exec_mode);
4985
4986 switch (b->shader->info.stage) {
4987 case MESA_SHADER_GEOMETRY:
4988 b->shader->info.gs.output_primitive = primitive;
4989 break;
4990 case MESA_SHADER_MESH:
4991 b->shader->info.mesh.primitive_type = primitive;
4992 break;
4993 default:
4994 vtn_fail("Execution mode OutputPoints not supported in stage %s",
4995 _mesa_shader_stage_to_string(b->shader->info.stage));
4996 break;
4997 }
4998 break;
4999 }
5000
5001 case SpvExecutionModeOutputLineStrip:
5002 case SpvExecutionModeOutputTriangleStrip:
5003 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5004 b->shader->info.gs.output_primitive =
5005 gl_primitive_from_spv_execution_mode(b, mode->exec_mode);
5006 break;
5007
5008 case SpvExecutionModeSpacingEqual:
5009 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5010 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5011 b->shader->info.tess.spacing = TESS_SPACING_EQUAL;
5012 break;
5013 case SpvExecutionModeSpacingFractionalEven:
5014 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5015 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5016 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN;
5017 break;
5018 case SpvExecutionModeSpacingFractionalOdd:
5019 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5020 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5021 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD;
5022 break;
5023 case SpvExecutionModeVertexOrderCw:
5024 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5025 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5026 b->shader->info.tess.ccw = false;
5027 break;
5028 case SpvExecutionModeVertexOrderCcw:
5029 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5030 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5031 b->shader->info.tess.ccw = true;
5032 break;
5033 case SpvExecutionModePointMode:
5034 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5035 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5036 b->shader->info.tess.point_mode = true;
5037 break;
5038
5039 case SpvExecutionModePixelCenterInteger:
5040 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5041 b->shader->info.fs.pixel_center_integer = true;
5042 break;
5043
5044 case SpvExecutionModeXfb:
5045 b->shader->info.has_transform_feedback_varyings = true;
5046 break;
5047
5048 case SpvExecutionModeVecTypeHint:
5049 break; /* OpenCL */
5050
5051 case SpvExecutionModeContractionOff:
5052 if (b->shader->info.stage != MESA_SHADER_KERNEL)
5053 vtn_warn("ExectionMode only allowed for CL-style kernels: %s",
5054 spirv_executionmode_to_string(mode->exec_mode));
5055 else
5056 b->exact = true;
5057 break;
5058
5059 case SpvExecutionModeStencilRefReplacingEXT:
5060 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5061 break;
5062
5063 case SpvExecutionModeDerivativeGroupQuadsNV:
5064 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5065 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_QUADS;
5066 break;
5067
5068 case SpvExecutionModeDerivativeGroupLinearNV:
5069 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5070 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_LINEAR;
5071 break;
5072
5073 case SpvExecutionModePixelInterlockOrderedEXT:
5074 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5075 b->shader->info.fs.pixel_interlock_ordered = true;
5076 break;
5077
5078 case SpvExecutionModePixelInterlockUnorderedEXT:
5079 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5080 b->shader->info.fs.pixel_interlock_unordered = true;
5081 break;
5082
5083 case SpvExecutionModeSampleInterlockOrderedEXT:
5084 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5085 b->shader->info.fs.sample_interlock_ordered = true;
5086 break;
5087
5088 case SpvExecutionModeSampleInterlockUnorderedEXT:
5089 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5090 b->shader->info.fs.sample_interlock_unordered = true;
5091 break;
5092
5093 case SpvExecutionModeDenormPreserve:
5094 case SpvExecutionModeDenormFlushToZero:
5095 case SpvExecutionModeSignedZeroInfNanPreserve:
5096 case SpvExecutionModeRoundingModeRTE:
5097 case SpvExecutionModeRoundingModeRTZ: {
5098 unsigned execution_mode = 0;
5099 switch (mode->exec_mode) {
5100 case SpvExecutionModeDenormPreserve:
5101 switch (mode->operands[0]) {
5102 case 16: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP16; break;
5103 case 32: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP32; break;
5104 case 64: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP64; break;
5105 default: vtn_fail("Floating point type not supported");
5106 }
5107 break;
5108 case SpvExecutionModeDenormFlushToZero:
5109 switch (mode->operands[0]) {
5110 case 16: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16; break;
5111 case 32: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32; break;
5112 case 64: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64; break;
5113 default: vtn_fail("Floating point type not supported");
5114 }
5115 break;
5116 case SpvExecutionModeSignedZeroInfNanPreserve:
5117 switch (mode->operands[0]) {
5118 case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break;
5119 case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break;
5120 case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break;
5121 default: vtn_fail("Floating point type not supported");
5122 }
5123 break;
5124 case SpvExecutionModeRoundingModeRTE:
5125 switch (mode->operands[0]) {
5126 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16; break;
5127 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32; break;
5128 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64; break;
5129 default: vtn_fail("Floating point type not supported");
5130 }
5131 break;
5132 case SpvExecutionModeRoundingModeRTZ:
5133 switch (mode->operands[0]) {
5134 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16; break;
5135 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32; break;
5136 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; break;
5137 default: vtn_fail("Floating point type not supported");
5138 }
5139 break;
5140 default:
5141 break;
5142 }
5143
5144 b->shader->info.float_controls_execution_mode |= execution_mode;
5145
5146 for (unsigned bit_size = 16; bit_size <= 64; bit_size *= 2) {
5147 vtn_fail_if(nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, bit_size) &&
5148 nir_is_denorm_preserve(b->shader->info.float_controls_execution_mode, bit_size),
5149 "Cannot flush to zero and preserve denorms for the same bit size.");
5150 vtn_fail_if(nir_is_rounding_mode_rtne(b->shader->info.float_controls_execution_mode, bit_size) &&
5151 nir_is_rounding_mode_rtz(b->shader->info.float_controls_execution_mode, bit_size),
5152 "Cannot set rounding mode to RTNE and RTZ for the same bit size.");
5153 }
5154 break;
5155 }
5156
5157 case SpvExecutionModeLocalSizeId:
5158 case SpvExecutionModeLocalSizeHintId:
5159 /* Handled later by vtn_handle_execution_mode_id(). */
5160 break;
5161
5162 case SpvExecutionModeSubgroupSize:
5163 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5164 b->shader->info.cs.subgroup_size = mode->operands[0];
5165 break;
5166
5167 case SpvExecutionModeSubgroupUniformControlFlowKHR:
5168 /* There's no corresponding SPIR-V capability, so check here. */
5169 vtn_fail_if(!b->options->caps.subgroup_uniform_control_flow,
5170 "SpvExecutionModeSubgroupUniformControlFlowKHR not supported.");
5171 break;
5172
5173 default:
5174 vtn_fail("Unhandled execution mode: %s (%u)",
5175 spirv_executionmode_to_string(mode->exec_mode),
5176 mode->exec_mode);
5177 }
5178 }
5179
5180 static void
vtn_handle_execution_mode_id(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5181 vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_point,
5182 const struct vtn_decoration *mode, UNUSED void *data)
5183 {
5184
5185 vtn_assert(b->entry_point == entry_point);
5186
5187 switch (mode->exec_mode) {
5188 case SpvExecutionModeLocalSizeId:
5189 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5190 b->shader->info.workgroup_size[0] = vtn_constant_uint(b, mode->operands[0]);
5191 b->shader->info.workgroup_size[1] = vtn_constant_uint(b, mode->operands[1]);
5192 b->shader->info.workgroup_size[2] = vtn_constant_uint(b, mode->operands[2]);
5193 } else {
5194 vtn_fail("Execution mode LocalSizeId not supported in stage %s",
5195 _mesa_shader_stage_to_string(b->shader->info.stage));
5196 }
5197 break;
5198
5199 case SpvExecutionModeLocalSizeHintId:
5200 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5201 b->shader->info.cs.workgroup_size_hint[0] = vtn_constant_uint(b, mode->operands[0]);
5202 b->shader->info.cs.workgroup_size_hint[1] = vtn_constant_uint(b, mode->operands[1]);
5203 b->shader->info.cs.workgroup_size_hint[2] = vtn_constant_uint(b, mode->operands[2]);
5204 break;
5205
5206 default:
5207 /* Nothing to do. Literal execution modes already handled by
5208 * vtn_handle_execution_mode(). */
5209 break;
5210 }
5211 }
5212
5213 static bool
vtn_handle_variable_or_type_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5214 vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
5215 const uint32_t *w, unsigned count)
5216 {
5217 vtn_set_instruction_result_type(b, opcode, w, count);
5218
5219 switch (opcode) {
5220 case SpvOpSource:
5221 case SpvOpSourceContinued:
5222 case SpvOpSourceExtension:
5223 case SpvOpExtension:
5224 case SpvOpCapability:
5225 case SpvOpExtInstImport:
5226 case SpvOpMemoryModel:
5227 case SpvOpEntryPoint:
5228 case SpvOpExecutionMode:
5229 case SpvOpString:
5230 case SpvOpName:
5231 case SpvOpMemberName:
5232 case SpvOpDecorationGroup:
5233 case SpvOpDecorate:
5234 case SpvOpDecorateId:
5235 case SpvOpMemberDecorate:
5236 case SpvOpGroupDecorate:
5237 case SpvOpGroupMemberDecorate:
5238 case SpvOpDecorateString:
5239 case SpvOpMemberDecorateString:
5240 vtn_fail("Invalid opcode types and variables section");
5241 break;
5242
5243 case SpvOpTypeVoid:
5244 case SpvOpTypeBool:
5245 case SpvOpTypeInt:
5246 case SpvOpTypeFloat:
5247 case SpvOpTypeVector:
5248 case SpvOpTypeMatrix:
5249 case SpvOpTypeImage:
5250 case SpvOpTypeSampler:
5251 case SpvOpTypeSampledImage:
5252 case SpvOpTypeArray:
5253 case SpvOpTypeRuntimeArray:
5254 case SpvOpTypeStruct:
5255 case SpvOpTypeOpaque:
5256 case SpvOpTypePointer:
5257 case SpvOpTypeForwardPointer:
5258 case SpvOpTypeFunction:
5259 case SpvOpTypeEvent:
5260 case SpvOpTypeDeviceEvent:
5261 case SpvOpTypeReserveId:
5262 case SpvOpTypeQueue:
5263 case SpvOpTypePipe:
5264 case SpvOpTypeAccelerationStructureKHR:
5265 vtn_handle_type(b, opcode, w, count);
5266 break;
5267
5268 case SpvOpConstantTrue:
5269 case SpvOpConstantFalse:
5270 case SpvOpConstant:
5271 case SpvOpConstantComposite:
5272 case SpvOpConstantNull:
5273 case SpvOpSpecConstantTrue:
5274 case SpvOpSpecConstantFalse:
5275 case SpvOpSpecConstant:
5276 case SpvOpSpecConstantComposite:
5277 case SpvOpSpecConstantOp:
5278 vtn_handle_constant(b, opcode, w, count);
5279 break;
5280
5281 case SpvOpUndef:
5282 case SpvOpVariable:
5283 case SpvOpConstantSampler:
5284 vtn_handle_variables(b, opcode, w, count);
5285 break;
5286
5287 case SpvOpExtInst: {
5288 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5289 /* NonSemantic extended instructions are acceptable in preamble, others
5290 * will indicate the end of preamble.
5291 */
5292 return val->ext_handler == vtn_handle_non_semantic_instruction;
5293 }
5294
5295 default:
5296 return false; /* End of preamble */
5297 }
5298
5299 return true;
5300 }
5301
5302 static struct vtn_ssa_value *
vtn_nir_select(struct vtn_builder * b,struct vtn_ssa_value * src0,struct vtn_ssa_value * src1,struct vtn_ssa_value * src2)5303 vtn_nir_select(struct vtn_builder *b, struct vtn_ssa_value *src0,
5304 struct vtn_ssa_value *src1, struct vtn_ssa_value *src2)
5305 {
5306 struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value);
5307 dest->type = src1->type;
5308
5309 if (glsl_type_is_vector_or_scalar(src1->type)) {
5310 dest->def = nir_bcsel(&b->nb, src0->def, src1->def, src2->def);
5311 } else {
5312 unsigned elems = glsl_get_length(src1->type);
5313
5314 dest->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
5315 for (unsigned i = 0; i < elems; i++) {
5316 dest->elems[i] = vtn_nir_select(b, src0,
5317 src1->elems[i], src2->elems[i]);
5318 }
5319 }
5320
5321 return dest;
5322 }
5323
5324 static void
vtn_handle_select(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5325 vtn_handle_select(struct vtn_builder *b, SpvOp opcode,
5326 const uint32_t *w, unsigned count)
5327 {
5328 /* Handle OpSelect up-front here because it needs to be able to handle
5329 * pointers and not just regular vectors and scalars.
5330 */
5331 struct vtn_value *res_val = vtn_untyped_value(b, w[2]);
5332 struct vtn_value *cond_val = vtn_untyped_value(b, w[3]);
5333 struct vtn_value *obj1_val = vtn_untyped_value(b, w[4]);
5334 struct vtn_value *obj2_val = vtn_untyped_value(b, w[5]);
5335
5336 vtn_fail_if(obj1_val->type != res_val->type ||
5337 obj2_val->type != res_val->type,
5338 "Object types must match the result type in OpSelect");
5339
5340 vtn_fail_if((cond_val->type->base_type != vtn_base_type_scalar &&
5341 cond_val->type->base_type != vtn_base_type_vector) ||
5342 !glsl_type_is_boolean(cond_val->type->type),
5343 "OpSelect must have either a vector of booleans or "
5344 "a boolean as Condition type");
5345
5346 vtn_fail_if(cond_val->type->base_type == vtn_base_type_vector &&
5347 (res_val->type->base_type != vtn_base_type_vector ||
5348 res_val->type->length != cond_val->type->length),
5349 "When Condition type in OpSelect is a vector, the Result "
5350 "type must be a vector of the same length");
5351
5352 switch (res_val->type->base_type) {
5353 case vtn_base_type_scalar:
5354 case vtn_base_type_vector:
5355 case vtn_base_type_matrix:
5356 case vtn_base_type_array:
5357 case vtn_base_type_struct:
5358 /* OK. */
5359 break;
5360 case vtn_base_type_pointer:
5361 /* We need to have actual storage for pointer types. */
5362 vtn_fail_if(res_val->type->type == NULL,
5363 "Invalid pointer result type for OpSelect");
5364 break;
5365 default:
5366 vtn_fail("Result type of OpSelect must be a scalar, composite, or pointer");
5367 }
5368
5369 vtn_push_ssa_value(b, w[2],
5370 vtn_nir_select(b, vtn_ssa_value(b, w[3]),
5371 vtn_ssa_value(b, w[4]),
5372 vtn_ssa_value(b, w[5])));
5373 }
5374
5375 static void
vtn_handle_ptr(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5376 vtn_handle_ptr(struct vtn_builder *b, SpvOp opcode,
5377 const uint32_t *w, unsigned count)
5378 {
5379 struct vtn_type *type1 = vtn_get_value_type(b, w[3]);
5380 struct vtn_type *type2 = vtn_get_value_type(b, w[4]);
5381 vtn_fail_if(type1->base_type != vtn_base_type_pointer ||
5382 type2->base_type != vtn_base_type_pointer,
5383 "%s operands must have pointer types",
5384 spirv_op_to_string(opcode));
5385 vtn_fail_if(type1->storage_class != type2->storage_class,
5386 "%s operands must have the same storage class",
5387 spirv_op_to_string(opcode));
5388
5389 struct vtn_type *vtn_type = vtn_get_type(b, w[1]);
5390 const struct glsl_type *type = vtn_type->type;
5391
5392 nir_address_format addr_format = vtn_mode_to_address_format(
5393 b, vtn_storage_class_to_mode(b, type1->storage_class, NULL, NULL));
5394
5395 nir_ssa_def *def;
5396
5397 switch (opcode) {
5398 case SpvOpPtrDiff: {
5399 /* OpPtrDiff returns the difference in number of elements (not byte offset). */
5400 unsigned elem_size, elem_align;
5401 glsl_get_natural_size_align_bytes(type1->deref->type,
5402 &elem_size, &elem_align);
5403
5404 def = nir_build_addr_isub(&b->nb,
5405 vtn_get_nir_ssa(b, w[3]),
5406 vtn_get_nir_ssa(b, w[4]),
5407 addr_format);
5408 def = nir_idiv(&b->nb, def, nir_imm_intN_t(&b->nb, elem_size, def->bit_size));
5409 def = nir_i2i(&b->nb, def, glsl_get_bit_size(type));
5410 break;
5411 }
5412
5413 case SpvOpPtrEqual:
5414 case SpvOpPtrNotEqual: {
5415 def = nir_build_addr_ieq(&b->nb,
5416 vtn_get_nir_ssa(b, w[3]),
5417 vtn_get_nir_ssa(b, w[4]),
5418 addr_format);
5419 if (opcode == SpvOpPtrNotEqual)
5420 def = nir_inot(&b->nb, def);
5421 break;
5422 }
5423
5424 default:
5425 unreachable("Invalid ptr operation");
5426 }
5427
5428 vtn_push_nir_ssa(b, w[2], def);
5429 }
5430
5431 static void
vtn_handle_ray_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5432 vtn_handle_ray_intrinsic(struct vtn_builder *b, SpvOp opcode,
5433 const uint32_t *w, unsigned count)
5434 {
5435 nir_intrinsic_instr *intrin;
5436
5437 switch (opcode) {
5438 case SpvOpTraceNV:
5439 case SpvOpTraceRayKHR: {
5440 intrin = nir_intrinsic_instr_create(b->nb.shader,
5441 nir_intrinsic_trace_ray);
5442
5443 /* The sources are in the same order in the NIR intrinsic */
5444 for (unsigned i = 0; i < 10; i++)
5445 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
5446
5447 nir_deref_instr *payload;
5448 if (opcode == SpvOpTraceNV)
5449 payload = vtn_get_call_payload_for_location(b, w[11]);
5450 else
5451 payload = vtn_nir_deref(b, w[11]);
5452 intrin->src[10] = nir_src_for_ssa(&payload->dest.ssa);
5453 nir_builder_instr_insert(&b->nb, &intrin->instr);
5454 break;
5455 }
5456
5457 case SpvOpReportIntersectionKHR: {
5458 intrin = nir_intrinsic_instr_create(b->nb.shader,
5459 nir_intrinsic_report_ray_intersection);
5460 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
5461 intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
5462 nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 1, NULL);
5463 nir_builder_instr_insert(&b->nb, &intrin->instr);
5464 vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa);
5465 break;
5466 }
5467
5468 case SpvOpIgnoreIntersectionNV:
5469 intrin = nir_intrinsic_instr_create(b->nb.shader,
5470 nir_intrinsic_ignore_ray_intersection);
5471 nir_builder_instr_insert(&b->nb, &intrin->instr);
5472 break;
5473
5474 case SpvOpTerminateRayNV:
5475 intrin = nir_intrinsic_instr_create(b->nb.shader,
5476 nir_intrinsic_terminate_ray);
5477 nir_builder_instr_insert(&b->nb, &intrin->instr);
5478 break;
5479
5480 case SpvOpExecuteCallableNV:
5481 case SpvOpExecuteCallableKHR: {
5482 intrin = nir_intrinsic_instr_create(b->nb.shader,
5483 nir_intrinsic_execute_callable);
5484 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
5485 nir_deref_instr *payload;
5486 if (opcode == SpvOpExecuteCallableNV)
5487 payload = vtn_get_call_payload_for_location(b, w[2]);
5488 else
5489 payload = vtn_nir_deref(b, w[2]);
5490 intrin->src[1] = nir_src_for_ssa(&payload->dest.ssa);
5491 nir_builder_instr_insert(&b->nb, &intrin->instr);
5492 break;
5493 }
5494
5495 default:
5496 vtn_fail_with_opcode("Unhandled opcode", opcode);
5497 }
5498 }
5499
5500 static void
vtn_handle_write_packed_primitive_indices(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5501 vtn_handle_write_packed_primitive_indices(struct vtn_builder *b, SpvOp opcode,
5502 const uint32_t *w, unsigned count)
5503 {
5504 vtn_assert(opcode == SpvOpWritePackedPrimitiveIndices4x8NV);
5505
5506 /* TODO(mesh): Use or create a primitive that allow the unpacking to
5507 * happen in the backend. What we have here is functional but too
5508 * blunt.
5509 */
5510
5511 struct vtn_type *offset_type = vtn_get_value_type(b, w[1]);
5512 vtn_fail_if(offset_type->base_type != vtn_base_type_scalar ||
5513 offset_type->type != glsl_uint_type(),
5514 "Index Offset type of OpWritePackedPrimitiveIndices4x8NV "
5515 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5516
5517 struct vtn_type *packed_type = vtn_get_value_type(b, w[2]);
5518 vtn_fail_if(packed_type->base_type != vtn_base_type_scalar ||
5519 packed_type->type != glsl_uint_type(),
5520 "Packed Indices type of OpWritePackedPrimitiveIndices4x8NV "
5521 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5522
5523 nir_deref_instr *indices = NULL;
5524 nir_foreach_variable_with_modes(var, b->nb.shader, nir_var_shader_out) {
5525 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) {
5526 indices = nir_build_deref_var(&b->nb, var);
5527 break;
5528 }
5529 }
5530
5531 /* TODO(mesh): It may be the case that the variable is not present in the
5532 * entry point interface list.
5533 *
5534 * See https://github.com/KhronosGroup/SPIRV-Registry/issues/104.
5535 */
5536 vtn_fail_if(indices == NULL,
5537 "Missing output variable decorated with PrimitiveIndices builtin.");
5538
5539 nir_ssa_def *offset = vtn_get_nir_ssa(b, w[1]);
5540 nir_ssa_def *packed = vtn_get_nir_ssa(b, w[2]);
5541 nir_ssa_def *unpacked = nir_unpack_bits(&b->nb, packed, 8);
5542 for (int i = 0; i < 4; i++) {
5543 nir_deref_instr *offset_deref =
5544 nir_build_deref_array(&b->nb, indices,
5545 nir_iadd_imm(&b->nb, offset, i));
5546 nir_ssa_def *val = nir_u2u(&b->nb, nir_channel(&b->nb, unpacked, i), 32);
5547
5548 nir_store_deref(&b->nb, offset_deref, val, 0x1);
5549 }
5550 }
5551
5552 static bool
vtn_handle_body_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5553 vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
5554 const uint32_t *w, unsigned count)
5555 {
5556 switch (opcode) {
5557 case SpvOpLabel:
5558 break;
5559
5560 case SpvOpLoopMerge:
5561 case SpvOpSelectionMerge:
5562 /* This is handled by cfg pre-pass and walk_blocks */
5563 break;
5564
5565 case SpvOpUndef: {
5566 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
5567 val->type = vtn_get_type(b, w[1]);
5568 break;
5569 }
5570
5571 case SpvOpExtInst:
5572 vtn_handle_extension(b, opcode, w, count);
5573 break;
5574
5575 case SpvOpVariable:
5576 case SpvOpLoad:
5577 case SpvOpStore:
5578 case SpvOpCopyMemory:
5579 case SpvOpCopyMemorySized:
5580 case SpvOpAccessChain:
5581 case SpvOpPtrAccessChain:
5582 case SpvOpInBoundsAccessChain:
5583 case SpvOpInBoundsPtrAccessChain:
5584 case SpvOpArrayLength:
5585 case SpvOpConvertPtrToU:
5586 case SpvOpConvertUToPtr:
5587 case SpvOpGenericCastToPtrExplicit:
5588 case SpvOpGenericPtrMemSemantics:
5589 case SpvOpSubgroupBlockReadINTEL:
5590 case SpvOpSubgroupBlockWriteINTEL:
5591 case SpvOpConvertUToAccelerationStructureKHR:
5592 vtn_handle_variables(b, opcode, w, count);
5593 break;
5594
5595 case SpvOpFunctionCall:
5596 vtn_handle_function_call(b, opcode, w, count);
5597 break;
5598
5599 case SpvOpSampledImage:
5600 case SpvOpImage:
5601 case SpvOpImageSparseTexelsResident:
5602 case SpvOpImageSampleImplicitLod:
5603 case SpvOpImageSparseSampleImplicitLod:
5604 case SpvOpImageSampleExplicitLod:
5605 case SpvOpImageSparseSampleExplicitLod:
5606 case SpvOpImageSampleDrefImplicitLod:
5607 case SpvOpImageSparseSampleDrefImplicitLod:
5608 case SpvOpImageSampleDrefExplicitLod:
5609 case SpvOpImageSparseSampleDrefExplicitLod:
5610 case SpvOpImageSampleProjImplicitLod:
5611 case SpvOpImageSampleProjExplicitLod:
5612 case SpvOpImageSampleProjDrefImplicitLod:
5613 case SpvOpImageSampleProjDrefExplicitLod:
5614 case SpvOpImageFetch:
5615 case SpvOpImageSparseFetch:
5616 case SpvOpImageGather:
5617 case SpvOpImageSparseGather:
5618 case SpvOpImageDrefGather:
5619 case SpvOpImageSparseDrefGather:
5620 case SpvOpImageQueryLod:
5621 case SpvOpImageQueryLevels:
5622 vtn_handle_texture(b, opcode, w, count);
5623 break;
5624
5625 case SpvOpImageRead:
5626 case SpvOpImageSparseRead:
5627 case SpvOpImageWrite:
5628 case SpvOpImageTexelPointer:
5629 case SpvOpImageQueryFormat:
5630 case SpvOpImageQueryOrder:
5631 vtn_handle_image(b, opcode, w, count);
5632 break;
5633
5634 case SpvOpImageQuerySamples:
5635 case SpvOpImageQuerySizeLod:
5636 case SpvOpImageQuerySize: {
5637 struct vtn_type *image_type = vtn_get_value_type(b, w[3]);
5638 vtn_assert(image_type->base_type == vtn_base_type_image);
5639 if (glsl_type_is_image(image_type->glsl_image)) {
5640 vtn_handle_image(b, opcode, w, count);
5641 } else {
5642 vtn_assert(glsl_type_is_sampler(image_type->glsl_image));
5643 vtn_handle_texture(b, opcode, w, count);
5644 }
5645 break;
5646 }
5647
5648 case SpvOpFragmentMaskFetchAMD:
5649 case SpvOpFragmentFetchAMD:
5650 vtn_handle_texture(b, opcode, w, count);
5651 break;
5652
5653 case SpvOpAtomicLoad:
5654 case SpvOpAtomicExchange:
5655 case SpvOpAtomicCompareExchange:
5656 case SpvOpAtomicCompareExchangeWeak:
5657 case SpvOpAtomicIIncrement:
5658 case SpvOpAtomicIDecrement:
5659 case SpvOpAtomicIAdd:
5660 case SpvOpAtomicISub:
5661 case SpvOpAtomicSMin:
5662 case SpvOpAtomicUMin:
5663 case SpvOpAtomicSMax:
5664 case SpvOpAtomicUMax:
5665 case SpvOpAtomicAnd:
5666 case SpvOpAtomicOr:
5667 case SpvOpAtomicXor:
5668 case SpvOpAtomicFAddEXT:
5669 case SpvOpAtomicFMinEXT:
5670 case SpvOpAtomicFMaxEXT:
5671 case SpvOpAtomicFlagTestAndSet: {
5672 struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
5673 if (pointer->value_type == vtn_value_type_image_pointer) {
5674 vtn_handle_image(b, opcode, w, count);
5675 } else {
5676 vtn_assert(pointer->value_type == vtn_value_type_pointer);
5677 vtn_handle_atomics(b, opcode, w, count);
5678 }
5679 break;
5680 }
5681
5682 case SpvOpAtomicStore:
5683 case SpvOpAtomicFlagClear: {
5684 struct vtn_value *pointer = vtn_untyped_value(b, w[1]);
5685 if (pointer->value_type == vtn_value_type_image_pointer) {
5686 vtn_handle_image(b, opcode, w, count);
5687 } else {
5688 vtn_assert(pointer->value_type == vtn_value_type_pointer);
5689 vtn_handle_atomics(b, opcode, w, count);
5690 }
5691 break;
5692 }
5693
5694 case SpvOpSelect:
5695 vtn_handle_select(b, opcode, w, count);
5696 break;
5697
5698 case SpvOpSNegate:
5699 case SpvOpFNegate:
5700 case SpvOpNot:
5701 case SpvOpAny:
5702 case SpvOpAll:
5703 case SpvOpConvertFToU:
5704 case SpvOpConvertFToS:
5705 case SpvOpConvertSToF:
5706 case SpvOpConvertUToF:
5707 case SpvOpUConvert:
5708 case SpvOpSConvert:
5709 case SpvOpFConvert:
5710 case SpvOpQuantizeToF16:
5711 case SpvOpSatConvertSToU:
5712 case SpvOpSatConvertUToS:
5713 case SpvOpPtrCastToGeneric:
5714 case SpvOpGenericCastToPtr:
5715 case SpvOpIsNan:
5716 case SpvOpIsInf:
5717 case SpvOpIsFinite:
5718 case SpvOpIsNormal:
5719 case SpvOpSignBitSet:
5720 case SpvOpLessOrGreater:
5721 case SpvOpOrdered:
5722 case SpvOpUnordered:
5723 case SpvOpIAdd:
5724 case SpvOpFAdd:
5725 case SpvOpISub:
5726 case SpvOpFSub:
5727 case SpvOpIMul:
5728 case SpvOpFMul:
5729 case SpvOpUDiv:
5730 case SpvOpSDiv:
5731 case SpvOpFDiv:
5732 case SpvOpUMod:
5733 case SpvOpSRem:
5734 case SpvOpSMod:
5735 case SpvOpFRem:
5736 case SpvOpFMod:
5737 case SpvOpVectorTimesScalar:
5738 case SpvOpDot:
5739 case SpvOpIAddCarry:
5740 case SpvOpISubBorrow:
5741 case SpvOpUMulExtended:
5742 case SpvOpSMulExtended:
5743 case SpvOpShiftRightLogical:
5744 case SpvOpShiftRightArithmetic:
5745 case SpvOpShiftLeftLogical:
5746 case SpvOpLogicalEqual:
5747 case SpvOpLogicalNotEqual:
5748 case SpvOpLogicalOr:
5749 case SpvOpLogicalAnd:
5750 case SpvOpLogicalNot:
5751 case SpvOpBitwiseOr:
5752 case SpvOpBitwiseXor:
5753 case SpvOpBitwiseAnd:
5754 case SpvOpIEqual:
5755 case SpvOpFOrdEqual:
5756 case SpvOpFUnordEqual:
5757 case SpvOpINotEqual:
5758 case SpvOpFOrdNotEqual:
5759 case SpvOpFUnordNotEqual:
5760 case SpvOpULessThan:
5761 case SpvOpSLessThan:
5762 case SpvOpFOrdLessThan:
5763 case SpvOpFUnordLessThan:
5764 case SpvOpUGreaterThan:
5765 case SpvOpSGreaterThan:
5766 case SpvOpFOrdGreaterThan:
5767 case SpvOpFUnordGreaterThan:
5768 case SpvOpULessThanEqual:
5769 case SpvOpSLessThanEqual:
5770 case SpvOpFOrdLessThanEqual:
5771 case SpvOpFUnordLessThanEqual:
5772 case SpvOpUGreaterThanEqual:
5773 case SpvOpSGreaterThanEqual:
5774 case SpvOpFOrdGreaterThanEqual:
5775 case SpvOpFUnordGreaterThanEqual:
5776 case SpvOpDPdx:
5777 case SpvOpDPdy:
5778 case SpvOpFwidth:
5779 case SpvOpDPdxFine:
5780 case SpvOpDPdyFine:
5781 case SpvOpFwidthFine:
5782 case SpvOpDPdxCoarse:
5783 case SpvOpDPdyCoarse:
5784 case SpvOpFwidthCoarse:
5785 case SpvOpBitFieldInsert:
5786 case SpvOpBitFieldSExtract:
5787 case SpvOpBitFieldUExtract:
5788 case SpvOpBitReverse:
5789 case SpvOpBitCount:
5790 case SpvOpTranspose:
5791 case SpvOpOuterProduct:
5792 case SpvOpMatrixTimesScalar:
5793 case SpvOpVectorTimesMatrix:
5794 case SpvOpMatrixTimesVector:
5795 case SpvOpMatrixTimesMatrix:
5796 case SpvOpUCountLeadingZerosINTEL:
5797 case SpvOpUCountTrailingZerosINTEL:
5798 case SpvOpAbsISubINTEL:
5799 case SpvOpAbsUSubINTEL:
5800 case SpvOpIAddSatINTEL:
5801 case SpvOpUAddSatINTEL:
5802 case SpvOpIAverageINTEL:
5803 case SpvOpUAverageINTEL:
5804 case SpvOpIAverageRoundedINTEL:
5805 case SpvOpUAverageRoundedINTEL:
5806 case SpvOpISubSatINTEL:
5807 case SpvOpUSubSatINTEL:
5808 case SpvOpIMul32x16INTEL:
5809 case SpvOpUMul32x16INTEL:
5810 vtn_handle_alu(b, opcode, w, count);
5811 break;
5812
5813 case SpvOpSDotKHR:
5814 case SpvOpUDotKHR:
5815 case SpvOpSUDotKHR:
5816 case SpvOpSDotAccSatKHR:
5817 case SpvOpUDotAccSatKHR:
5818 case SpvOpSUDotAccSatKHR:
5819 vtn_handle_integer_dot(b, opcode, w, count);
5820 break;
5821
5822 case SpvOpBitcast:
5823 vtn_handle_bitcast(b, w, count);
5824 break;
5825
5826 case SpvOpVectorExtractDynamic:
5827 case SpvOpVectorInsertDynamic:
5828 case SpvOpVectorShuffle:
5829 case SpvOpCompositeConstruct:
5830 case SpvOpCompositeExtract:
5831 case SpvOpCompositeInsert:
5832 case SpvOpCopyLogical:
5833 case SpvOpCopyObject:
5834 vtn_handle_composite(b, opcode, w, count);
5835 break;
5836
5837 case SpvOpEmitVertex:
5838 case SpvOpEndPrimitive:
5839 case SpvOpEmitStreamVertex:
5840 case SpvOpEndStreamPrimitive:
5841 case SpvOpControlBarrier:
5842 case SpvOpMemoryBarrier:
5843 vtn_handle_barrier(b, opcode, w, count);
5844 break;
5845
5846 case SpvOpGroupNonUniformElect:
5847 case SpvOpGroupNonUniformAll:
5848 case SpvOpGroupNonUniformAny:
5849 case SpvOpGroupNonUniformAllEqual:
5850 case SpvOpGroupNonUniformBroadcast:
5851 case SpvOpGroupNonUniformBroadcastFirst:
5852 case SpvOpGroupNonUniformBallot:
5853 case SpvOpGroupNonUniformInverseBallot:
5854 case SpvOpGroupNonUniformBallotBitExtract:
5855 case SpvOpGroupNonUniformBallotBitCount:
5856 case SpvOpGroupNonUniformBallotFindLSB:
5857 case SpvOpGroupNonUniformBallotFindMSB:
5858 case SpvOpGroupNonUniformShuffle:
5859 case SpvOpGroupNonUniformShuffleXor:
5860 case SpvOpGroupNonUniformShuffleUp:
5861 case SpvOpGroupNonUniformShuffleDown:
5862 case SpvOpGroupNonUniformIAdd:
5863 case SpvOpGroupNonUniformFAdd:
5864 case SpvOpGroupNonUniformIMul:
5865 case SpvOpGroupNonUniformFMul:
5866 case SpvOpGroupNonUniformSMin:
5867 case SpvOpGroupNonUniformUMin:
5868 case SpvOpGroupNonUniformFMin:
5869 case SpvOpGroupNonUniformSMax:
5870 case SpvOpGroupNonUniformUMax:
5871 case SpvOpGroupNonUniformFMax:
5872 case SpvOpGroupNonUniformBitwiseAnd:
5873 case SpvOpGroupNonUniformBitwiseOr:
5874 case SpvOpGroupNonUniformBitwiseXor:
5875 case SpvOpGroupNonUniformLogicalAnd:
5876 case SpvOpGroupNonUniformLogicalOr:
5877 case SpvOpGroupNonUniformLogicalXor:
5878 case SpvOpGroupNonUniformQuadBroadcast:
5879 case SpvOpGroupNonUniformQuadSwap:
5880 case SpvOpGroupAll:
5881 case SpvOpGroupAny:
5882 case SpvOpGroupBroadcast:
5883 case SpvOpGroupIAdd:
5884 case SpvOpGroupFAdd:
5885 case SpvOpGroupFMin:
5886 case SpvOpGroupUMin:
5887 case SpvOpGroupSMin:
5888 case SpvOpGroupFMax:
5889 case SpvOpGroupUMax:
5890 case SpvOpGroupSMax:
5891 case SpvOpSubgroupBallotKHR:
5892 case SpvOpSubgroupFirstInvocationKHR:
5893 case SpvOpSubgroupReadInvocationKHR:
5894 case SpvOpSubgroupAllKHR:
5895 case SpvOpSubgroupAnyKHR:
5896 case SpvOpSubgroupAllEqualKHR:
5897 case SpvOpGroupIAddNonUniformAMD:
5898 case SpvOpGroupFAddNonUniformAMD:
5899 case SpvOpGroupFMinNonUniformAMD:
5900 case SpvOpGroupUMinNonUniformAMD:
5901 case SpvOpGroupSMinNonUniformAMD:
5902 case SpvOpGroupFMaxNonUniformAMD:
5903 case SpvOpGroupUMaxNonUniformAMD:
5904 case SpvOpGroupSMaxNonUniformAMD:
5905 case SpvOpSubgroupShuffleINTEL:
5906 case SpvOpSubgroupShuffleDownINTEL:
5907 case SpvOpSubgroupShuffleUpINTEL:
5908 case SpvOpSubgroupShuffleXorINTEL:
5909 vtn_handle_subgroup(b, opcode, w, count);
5910 break;
5911
5912 case SpvOpPtrDiff:
5913 case SpvOpPtrEqual:
5914 case SpvOpPtrNotEqual:
5915 vtn_handle_ptr(b, opcode, w, count);
5916 break;
5917
5918 case SpvOpBeginInvocationInterlockEXT:
5919 nir_begin_invocation_interlock(&b->nb);
5920 break;
5921
5922 case SpvOpEndInvocationInterlockEXT:
5923 nir_end_invocation_interlock(&b->nb);
5924 break;
5925
5926 case SpvOpDemoteToHelperInvocationEXT: {
5927 nir_demote(&b->nb);
5928 break;
5929 }
5930
5931 case SpvOpIsHelperInvocationEXT: {
5932 vtn_push_nir_ssa(b, w[2], nir_is_helper_invocation(&b->nb, 1));
5933 break;
5934 }
5935
5936 case SpvOpReadClockKHR: {
5937 SpvScope scope = vtn_constant_uint(b, w[3]);
5938 nir_scope nir_scope;
5939
5940 switch (scope) {
5941 case SpvScopeDevice:
5942 nir_scope = NIR_SCOPE_DEVICE;
5943 break;
5944 case SpvScopeSubgroup:
5945 nir_scope = NIR_SCOPE_SUBGROUP;
5946 break;
5947 default:
5948 vtn_fail("invalid read clock scope");
5949 }
5950
5951 /* Operation supports two result types: uvec2 and uint64_t. The NIR
5952 * intrinsic gives uvec2, so pack the result for the other case.
5953 */
5954 nir_ssa_def *result = nir_shader_clock(&b->nb, nir_scope);
5955
5956 struct vtn_type *type = vtn_get_type(b, w[1]);
5957 const struct glsl_type *dest_type = type->type;
5958
5959 if (glsl_type_is_vector(dest_type)) {
5960 assert(dest_type == glsl_vector_type(GLSL_TYPE_UINT, 2));
5961 } else {
5962 assert(glsl_type_is_scalar(dest_type));
5963 assert(glsl_get_base_type(dest_type) == GLSL_TYPE_UINT64);
5964 result = nir_pack_64_2x32(&b->nb, result);
5965 }
5966
5967 vtn_push_nir_ssa(b, w[2], result);
5968 break;
5969 }
5970
5971 case SpvOpTraceNV:
5972 case SpvOpTraceRayKHR:
5973 case SpvOpReportIntersectionKHR:
5974 case SpvOpIgnoreIntersectionNV:
5975 case SpvOpTerminateRayNV:
5976 case SpvOpExecuteCallableNV:
5977 case SpvOpExecuteCallableKHR:
5978 vtn_handle_ray_intrinsic(b, opcode, w, count);
5979 break;
5980
5981 case SpvOpLifetimeStart:
5982 case SpvOpLifetimeStop:
5983 break;
5984
5985 case SpvOpGroupAsyncCopy:
5986 case SpvOpGroupWaitEvents:
5987 vtn_handle_opencl_core_instruction(b, opcode, w, count);
5988 break;
5989
5990 case SpvOpWritePackedPrimitiveIndices4x8NV:
5991 vtn_handle_write_packed_primitive_indices(b, opcode, w, count);
5992 break;
5993
5994 default:
5995 vtn_fail_with_opcode("Unhandled opcode", opcode);
5996 }
5997
5998 return true;
5999 }
6000
6001 struct vtn_builder*
vtn_create_builder(const uint32_t * words,size_t word_count,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options)6002 vtn_create_builder(const uint32_t *words, size_t word_count,
6003 gl_shader_stage stage, const char *entry_point_name,
6004 const struct spirv_to_nir_options *options)
6005 {
6006 /* Initialize the vtn_builder object */
6007 struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
6008 struct spirv_to_nir_options *dup_options =
6009 ralloc(b, struct spirv_to_nir_options);
6010 *dup_options = *options;
6011
6012 b->spirv = words;
6013 b->spirv_word_count = word_count;
6014 b->file = NULL;
6015 b->line = -1;
6016 b->col = -1;
6017 list_inithead(&b->functions);
6018 b->entry_point_stage = stage;
6019 b->entry_point_name = entry_point_name;
6020 b->options = dup_options;
6021
6022 /*
6023 * Handle the SPIR-V header (first 5 dwords).
6024 * Can't use vtx_assert() as the setjmp(3) target isn't initialized yet.
6025 */
6026 if (word_count <= 5)
6027 goto fail;
6028
6029 if (words[0] != SpvMagicNumber) {
6030 vtn_err("words[0] was 0x%x, want 0x%x", words[0], SpvMagicNumber);
6031 goto fail;
6032 }
6033
6034 b->version = words[1];
6035 if (b->version < 0x10000) {
6036 vtn_err("version was 0x%x, want >= 0x10000", b->version);
6037 goto fail;
6038 }
6039
6040 b->generator_id = words[2] >> 16;
6041 uint16_t generator_version = words[2];
6042
6043 /* In GLSLang commit 8297936dd6eb3, their handling of barrier() was fixed
6044 * to provide correct memory semantics on compute shader barrier()
6045 * commands. Prior to that, we need to fix them up ourselves. This
6046 * GLSLang fix caused them to bump to generator version 3.
6047 */
6048 b->wa_glslang_cs_barrier =
6049 (b->generator_id == vtn_generator_glslang_reference_front_end &&
6050 generator_version < 3);
6051
6052 /* Identifying the LLVM-SPIRV translator:
6053 *
6054 * The LLVM-SPIRV translator currently doesn't store any generator ID [1].
6055 * Our use case involving the SPIRV-Tools linker also mean we want to check
6056 * for that tool instead. Finally the SPIRV-Tools linker also stores its
6057 * generator ID in the wrong location [2].
6058 *
6059 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1223
6060 * [2] : https://github.com/KhronosGroup/SPIRV-Tools/pull/4549
6061 */
6062 const bool is_llvm_spirv_translator =
6063 (b->generator_id == 0 &&
6064 generator_version == vtn_generator_spirv_tools_linker) ||
6065 b->generator_id == vtn_generator_spirv_tools_linker;
6066
6067 /* The LLVM-SPIRV translator generates Undef initializers for _local
6068 * variables [1].
6069 *
6070 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1224
6071 */
6072 b->wa_llvm_spirv_ignore_workgroup_initializer =
6073 b->options->environment == NIR_SPIRV_OPENCL && is_llvm_spirv_translator;
6074
6075 /* words[2] == generator magic */
6076 unsigned value_id_bound = words[3];
6077 if (words[4] != 0) {
6078 vtn_err("words[4] was %u, want 0", words[4]);
6079 goto fail;
6080 }
6081
6082 b->value_id_bound = value_id_bound;
6083 b->values = rzalloc_array(b, struct vtn_value, value_id_bound);
6084
6085 if (b->options->environment == NIR_SPIRV_VULKAN && b->version < 0x10400)
6086 b->vars_used_indirectly = _mesa_pointer_set_create(b);
6087
6088 return b;
6089 fail:
6090 ralloc_free(b);
6091 return NULL;
6092 }
6093
6094 static nir_function *
vtn_emit_kernel_entry_point_wrapper(struct vtn_builder * b,nir_function * entry_point)6095 vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b,
6096 nir_function *entry_point)
6097 {
6098 vtn_assert(entry_point == b->entry_point->func->nir_func);
6099 vtn_fail_if(!entry_point->name, "entry points are required to have a name");
6100 const char *func_name =
6101 ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name);
6102
6103 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
6104
6105 nir_function *main_entry_point = nir_function_create(b->shader, func_name);
6106 main_entry_point->impl = nir_function_impl_create(main_entry_point);
6107 nir_builder_init(&b->nb, main_entry_point->impl);
6108 b->nb.cursor = nir_after_cf_list(&main_entry_point->impl->body);
6109 b->func_param_idx = 0;
6110
6111 nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point);
6112
6113 for (unsigned i = 0; i < entry_point->num_params; ++i) {
6114 struct vtn_type *param_type = b->entry_point->func->type->params[i];
6115
6116 /* consider all pointers to function memory to be parameters passed
6117 * by value
6118 */
6119 bool is_by_val = param_type->base_type == vtn_base_type_pointer &&
6120 param_type->storage_class == SpvStorageClassFunction;
6121
6122 /* input variable */
6123 nir_variable *in_var = rzalloc(b->nb.shader, nir_variable);
6124 in_var->data.mode = nir_var_uniform;
6125 in_var->data.read_only = true;
6126 in_var->data.location = i;
6127 if (param_type->base_type == vtn_base_type_image) {
6128 in_var->data.access =
6129 spirv_to_gl_access_qualifier(b, param_type->access_qualifier);
6130 }
6131
6132 if (is_by_val)
6133 in_var->type = param_type->deref->type;
6134 else if (param_type->base_type == vtn_base_type_image)
6135 in_var->type = param_type->glsl_image;
6136 else if (param_type->base_type == vtn_base_type_sampler)
6137 in_var->type = glsl_bare_sampler_type();
6138 else
6139 in_var->type = param_type->type;
6140
6141 nir_shader_add_variable(b->nb.shader, in_var);
6142
6143 /* we have to copy the entire variable into function memory */
6144 if (is_by_val) {
6145 nir_variable *copy_var =
6146 nir_local_variable_create(main_entry_point->impl, in_var->type,
6147 "copy_in");
6148 nir_copy_var(&b->nb, copy_var, in_var);
6149 call->params[i] =
6150 nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->dest.ssa);
6151 } else if (param_type->base_type == vtn_base_type_image ||
6152 param_type->base_type == vtn_base_type_sampler) {
6153 /* Don't load the var, just pass a deref of it */
6154 call->params[i] = nir_src_for_ssa(&nir_build_deref_var(&b->nb, in_var)->dest.ssa);
6155 } else {
6156 call->params[i] = nir_src_for_ssa(nir_load_var(&b->nb, in_var));
6157 }
6158 }
6159
6160 nir_builder_instr_insert(&b->nb, &call->instr);
6161
6162 return main_entry_point;
6163 }
6164
6165 static bool
can_remove(nir_variable * var,void * data)6166 can_remove(nir_variable *var, void *data)
6167 {
6168 const struct set *vars_used_indirectly = data;
6169 return !_mesa_set_search(vars_used_indirectly, var);
6170 }
6171
6172 nir_shader *
spirv_to_nir(const uint32_t * words,size_t word_count,struct nir_spirv_specialization * spec,unsigned num_spec,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options,const nir_shader_compiler_options * nir_options)6173 spirv_to_nir(const uint32_t *words, size_t word_count,
6174 struct nir_spirv_specialization *spec, unsigned num_spec,
6175 gl_shader_stage stage, const char *entry_point_name,
6176 const struct spirv_to_nir_options *options,
6177 const nir_shader_compiler_options *nir_options)
6178
6179 {
6180 const uint32_t *word_end = words + word_count;
6181
6182 struct vtn_builder *b = vtn_create_builder(words, word_count,
6183 stage, entry_point_name,
6184 options);
6185
6186 if (b == NULL)
6187 return NULL;
6188
6189 /* See also _vtn_fail() */
6190 if (vtn_setjmp(b->fail_jump)) {
6191 ralloc_free(b);
6192 return NULL;
6193 }
6194
6195 /* Skip the SPIR-V header, handled at vtn_create_builder */
6196 words+= 5;
6197
6198 b->shader = nir_shader_create(b, stage, nir_options, NULL);
6199 b->shader->info.float_controls_execution_mode = options->float_controls_execution_mode;
6200
6201 /* Handle all the preamble instructions */
6202 words = vtn_foreach_instruction(b, words, word_end,
6203 vtn_handle_preamble_instruction);
6204
6205 /* DirectXShaderCompiler and glslang/shaderc both create OpKill from HLSL's
6206 * discard/clip, which uses demote semantics. DirectXShaderCompiler will use
6207 * demote if the extension is enabled, so we disable this workaround in that
6208 * case.
6209 *
6210 * Related glslang issue: https://github.com/KhronosGroup/glslang/issues/2416
6211 */
6212 bool glslang = b->generator_id == vtn_generator_glslang_reference_front_end ||
6213 b->generator_id == vtn_generator_shaderc_over_glslang;
6214 bool dxsc = b->generator_id == vtn_generator_spiregg;
6215 b->convert_discard_to_demote = ((dxsc && !b->uses_demote_to_helper_invocation) ||
6216 (glslang && b->source_lang == SpvSourceLanguageHLSL)) &&
6217 options->caps.demote_to_helper_invocation;
6218
6219 if (!options->create_library && b->entry_point == NULL) {
6220 vtn_fail("Entry point not found for %s shader \"%s\"",
6221 _mesa_shader_stage_to_string(stage), entry_point_name);
6222 ralloc_free(b);
6223 return NULL;
6224 }
6225
6226 /* Ensure a sane address mode is being used for function temps */
6227 assert(nir_address_format_bit_size(b->options->temp_addr_format) == nir_get_ptr_bitsize(b->shader));
6228 assert(nir_address_format_num_components(b->options->temp_addr_format) == 1);
6229
6230 /* Set shader info defaults */
6231 if (stage == MESA_SHADER_GEOMETRY)
6232 b->shader->info.gs.invocations = 1;
6233
6234 /* Parse execution modes. */
6235 if (!options->create_library)
6236 vtn_foreach_execution_mode(b, b->entry_point,
6237 vtn_handle_execution_mode, NULL);
6238
6239 b->specializations = spec;
6240 b->num_specializations = num_spec;
6241
6242 /* Handle all variable, type, and constant instructions */
6243 words = vtn_foreach_instruction(b, words, word_end,
6244 vtn_handle_variable_or_type_instruction);
6245
6246 /* Parse execution modes that depend on IDs. Must happen after we have
6247 * constants parsed.
6248 */
6249 if (!options->create_library)
6250 vtn_foreach_execution_mode(b, b->entry_point,
6251 vtn_handle_execution_mode_id, NULL);
6252
6253 if (b->workgroup_size_builtin) {
6254 vtn_assert(gl_shader_stage_uses_workgroup(stage));
6255 vtn_assert(b->workgroup_size_builtin->type->type ==
6256 glsl_vector_type(GLSL_TYPE_UINT, 3));
6257
6258 nir_const_value *const_size =
6259 b->workgroup_size_builtin->constant->values;
6260
6261 b->shader->info.workgroup_size[0] = const_size[0].u32;
6262 b->shader->info.workgroup_size[1] = const_size[1].u32;
6263 b->shader->info.workgroup_size[2] = const_size[2].u32;
6264 }
6265
6266 /* Set types on all vtn_values */
6267 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
6268
6269 vtn_build_cfg(b, words, word_end);
6270
6271 if (!options->create_library) {
6272 assert(b->entry_point->value_type == vtn_value_type_function);
6273 b->entry_point->func->referenced = true;
6274 }
6275
6276 bool progress;
6277 do {
6278 progress = false;
6279 vtn_foreach_cf_node(node, &b->functions) {
6280 struct vtn_function *func = vtn_cf_node_as_function(node);
6281 if ((options->create_library || func->referenced) && !func->emitted) {
6282 b->const_table = _mesa_pointer_hash_table_create(b);
6283
6284 vtn_function_emit(b, func, vtn_handle_body_instruction);
6285 progress = true;
6286 }
6287 }
6288 } while (progress);
6289
6290 if (!options->create_library) {
6291 vtn_assert(b->entry_point->value_type == vtn_value_type_function);
6292 nir_function *entry_point = b->entry_point->func->nir_func;
6293 vtn_assert(entry_point);
6294
6295 /* post process entry_points with input params */
6296 if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL)
6297 entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point);
6298
6299 entry_point->is_entrypoint = true;
6300 }
6301
6302 /* structurize the CFG */
6303 nir_lower_goto_ifs(b->shader);
6304
6305 /* A SPIR-V module can have multiple shaders stages and also multiple
6306 * shaders of the same stage. Global variables are declared per-module.
6307 *
6308 * Starting in SPIR-V 1.4 the list of global variables is part of
6309 * OpEntryPoint, so only valid ones will be created. Previous versions
6310 * only have Input and Output variables listed, so remove dead variables to
6311 * clean up the remaining ones.
6312 */
6313 if (!options->create_library && b->version < 0x10400) {
6314 const nir_remove_dead_variables_options dead_opts = {
6315 .can_remove_var = can_remove,
6316 .can_remove_var_data = b->vars_used_indirectly,
6317 };
6318 nir_remove_dead_variables(b->shader, ~(nir_var_function_temp |
6319 nir_var_shader_out |
6320 nir_var_shader_in |
6321 nir_var_system_value),
6322 b->vars_used_indirectly ? &dead_opts : NULL);
6323 }
6324
6325 nir_foreach_variable_in_shader(var, b->shader) {
6326 switch (var->data.mode) {
6327 case nir_var_mem_ubo:
6328 b->shader->info.num_ubos++;
6329 break;
6330 case nir_var_mem_ssbo:
6331 b->shader->info.num_ssbos++;
6332 break;
6333 case nir_var_mem_push_const:
6334 vtn_assert(b->shader->num_uniforms == 0);
6335 b->shader->num_uniforms =
6336 glsl_get_explicit_size(glsl_without_array(var->type), false);
6337 break;
6338 }
6339 }
6340
6341 /* We sometimes generate bogus derefs that, while never used, give the
6342 * validator a bit of heartburn. Run dead code to get rid of them.
6343 */
6344 nir_opt_dce(b->shader);
6345
6346 /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is
6347 * a Block, all of them will be and Blocks are explicitly laid out.
6348 */
6349 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
6350 if (glsl_type_is_interface(var->type)) {
6351 assert(b->options->caps.workgroup_memory_explicit_layout);
6352 b->shader->info.shared_memory_explicit_layout = true;
6353 break;
6354 }
6355 }
6356 if (b->shader->info.shared_memory_explicit_layout) {
6357 unsigned size = 0;
6358 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
6359 assert(glsl_type_is_interface(var->type));
6360 const bool align_to_stride = false;
6361 size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
6362 }
6363 b->shader->info.shared_size = size;
6364 }
6365
6366 /* Unparent the shader from the vtn_builder before we delete the builder */
6367 ralloc_steal(NULL, b->shader);
6368
6369 nir_shader *shader = b->shader;
6370 ralloc_free(b);
6371
6372 return shader;
6373 }
6374