• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Faith Ekstrand (faith@gfxstrand.net)
25  *
26  */
27 
28 #include "glsl_types.h"
29 #include "vtn_private.h"
30 #include "nir/nir_vla.h"
31 #include "nir/nir_control_flow.h"
32 #include "nir/nir_constant_expressions.h"
33 #include "nir/nir_deref.h"
34 #include "spirv_info.h"
35 
36 #include "util/format/u_format.h"
37 #include "util/u_math.h"
38 #include "util/u_string.h"
39 #include "util/u_debug.h"
40 
41 #include <stdio.h>
42 
43 #ifndef NDEBUG
44 uint32_t mesa_spirv_debug = 0;
45 
46 static const struct debug_named_value mesa_spirv_debug_control[] = {
47    { "structured", MESA_SPIRV_DEBUG_STRUCTURED,
48      "Print information of the SPIR-V structured control flow parsing" },
49    DEBUG_NAMED_VALUE_END,
50 };
51 
52 DEBUG_GET_ONCE_FLAGS_OPTION(mesa_spirv_debug, "MESA_SPIRV_DEBUG", mesa_spirv_debug_control, 0)
53 
54 static enum nir_spirv_debug_level
vtn_default_log_level(void)55 vtn_default_log_level(void)
56 {
57    enum nir_spirv_debug_level level = NIR_SPIRV_DEBUG_LEVEL_WARNING;
58    const char *vtn_log_level_strings[] = {
59       [NIR_SPIRV_DEBUG_LEVEL_WARNING] = "warning",
60       [NIR_SPIRV_DEBUG_LEVEL_INFO]  = "info",
61       [NIR_SPIRV_DEBUG_LEVEL_ERROR] = "error",
62    };
63    const char *str = getenv("MESA_SPIRV_LOG_LEVEL");
64 
65    if (str == NULL)
66       return NIR_SPIRV_DEBUG_LEVEL_WARNING;
67 
68    for (int i = 0; i < ARRAY_SIZE(vtn_log_level_strings); i++) {
69       if (strcasecmp(str, vtn_log_level_strings[i]) == 0) {
70          level = i;
71          break;
72       }
73    }
74 
75    return level;
76 }
77 #endif
78 
79 void
vtn_log(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)80 vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level,
81         size_t spirv_offset, const char *message)
82 {
83    if (b->options->debug.func) {
84       b->options->debug.func(b->options->debug.private_data,
85                              level, spirv_offset, message);
86    }
87 
88 #ifndef NDEBUG
89    static enum nir_spirv_debug_level default_level =
90       NIR_SPIRV_DEBUG_LEVEL_INVALID;
91 
92    if (default_level == NIR_SPIRV_DEBUG_LEVEL_INVALID)
93       default_level = vtn_default_log_level();
94 
95    if (level >= default_level)
96       fprintf(stderr, "%s\n", message);
97 #endif
98 }
99 
100 void
vtn_logf(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * fmt,...)101 vtn_logf(struct vtn_builder *b, enum nir_spirv_debug_level level,
102          size_t spirv_offset, const char *fmt, ...)
103 {
104    va_list args;
105    char *msg;
106 
107    va_start(args, fmt);
108    msg = ralloc_vasprintf(NULL, fmt, args);
109    va_end(args);
110 
111    vtn_log(b, level, spirv_offset, msg);
112 
113    ralloc_free(msg);
114 }
115 
116 static void
vtn_log_err(struct vtn_builder * b,enum nir_spirv_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)117 vtn_log_err(struct vtn_builder *b,
118             enum nir_spirv_debug_level level, const char *prefix,
119             const char *file, unsigned line,
120             const char *fmt, va_list args)
121 {
122    char *msg;
123 
124    msg = ralloc_strdup(NULL, prefix);
125 
126 #ifndef NDEBUG
127    ralloc_asprintf_append(&msg, "    In file %s:%u\n", file, line);
128 #endif
129 
130    ralloc_asprintf_append(&msg, "    ");
131 
132    ralloc_vasprintf_append(&msg, fmt, args);
133 
134    ralloc_asprintf_append(&msg, "\n    %zu bytes into the SPIR-V binary",
135                           b->spirv_offset);
136 
137    if (b->file) {
138       ralloc_asprintf_append(&msg,
139                              "\n    in SPIR-V source file %s, line %d, col %d",
140                              b->file, b->line, b->col);
141    }
142 
143    vtn_log(b, level, b->spirv_offset, msg);
144 
145    ralloc_free(msg);
146 }
147 
148 static void
vtn_dump_shader(struct vtn_builder * b,const char * path,const char * prefix)149 vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix)
150 {
151    static int idx = 0;
152 
153    char filename[1024];
154    int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv",
155                       path, prefix, idx++);
156    if (len < 0 || len >= sizeof(filename))
157       return;
158 
159    FILE *f = fopen(filename, "wb");
160    if (f == NULL)
161       return;
162 
163    fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f);
164    fclose(f);
165 
166    vtn_info("SPIR-V shader dumped to %s", filename);
167 }
168 
169 void
_vtn_warn(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)170 _vtn_warn(struct vtn_builder *b, const char *file, unsigned line,
171           const char *fmt, ...)
172 {
173    va_list args;
174 
175    va_start(args, fmt);
176    vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_WARNING, "SPIR-V WARNING:\n",
177                file, line, fmt, args);
178    va_end(args);
179 }
180 
181 void
_vtn_err(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)182 _vtn_err(struct vtn_builder *b, const char *file, unsigned line,
183           const char *fmt, ...)
184 {
185    va_list args;
186 
187    va_start(args, fmt);
188    vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V ERROR:\n",
189                file, line, fmt, args);
190    va_end(args);
191 }
192 
193 void
_vtn_fail(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)194 _vtn_fail(struct vtn_builder *b, const char *file, unsigned line,
195           const char *fmt, ...)
196 {
197    va_list args;
198 
199    va_start(args, fmt);
200    vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V parsing FAILED:\n",
201                file, line, fmt, args);
202    va_end(args);
203 
204    const char *dump_path = secure_getenv("MESA_SPIRV_FAIL_DUMP_PATH");
205    if (dump_path)
206       vtn_dump_shader(b, dump_path, "fail");
207 
208 #ifndef NDEBUG
209    if (!b->options->skip_os_break_in_debug_build)
210       os_break();
211 #endif
212 
213    vtn_longjmp(b->fail_jump, 1);
214 }
215 
216 const char *
vtn_value_type_to_string(enum vtn_value_type t)217 vtn_value_type_to_string(enum vtn_value_type t)
218 {
219 #define CASE(typ) case vtn_value_type_##typ: return #typ
220    switch (t) {
221    CASE(invalid);
222    CASE(undef);
223    CASE(string);
224    CASE(decoration_group);
225    CASE(type);
226    CASE(constant);
227    CASE(pointer);
228    CASE(function);
229    CASE(block);
230    CASE(ssa);
231    CASE(extension);
232    CASE(image_pointer);
233    }
234 #undef CASE
235    unreachable("unknown value type");
236    return "UNKNOWN";
237 }
238 
239 void
_vtn_fail_value_type_mismatch(struct vtn_builder * b,uint32_t value_id,enum vtn_value_type value_type)240 _vtn_fail_value_type_mismatch(struct vtn_builder *b, uint32_t value_id,
241                               enum vtn_value_type value_type)
242 {
243    struct vtn_value *val = vtn_untyped_value(b, value_id);
244    vtn_fail(
245       "SPIR-V id %u is the wrong kind of value: "
246       "expected '%s' but got '%s'",
247       vtn_id_for_value(b, val),
248       vtn_value_type_to_string(value_type),
249       vtn_value_type_to_string(val->value_type));
250 }
251 
_vtn_fail_value_not_pointer(struct vtn_builder * b,uint32_t value_id)252 void _vtn_fail_value_not_pointer(struct vtn_builder *b,
253                                  uint32_t value_id)
254 {
255    struct vtn_value *val = vtn_untyped_value(b, value_id);
256    vtn_fail("SPIR-V id %u is the wrong kind of value: "
257             "expected 'pointer' OR null constant but got "
258             "'%s' (%s)", value_id,
259             vtn_value_type_to_string(val->value_type),
260             val->is_null_constant ? "null constant" : "not null constant");
261 }
262 
263 static struct vtn_ssa_value *
vtn_undef_ssa_value(struct vtn_builder * b,const struct glsl_type * type)264 vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
265 {
266    struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
267    val->type = glsl_get_bare_type(type);
268 
269    if (glsl_type_is_cmat(type)) {
270       nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_undef");
271       vtn_set_ssa_value_var(b, val, mat->var);
272    } else if (glsl_type_is_vector_or_scalar(type)) {
273       unsigned num_components = glsl_get_vector_elements(val->type);
274       unsigned bit_size = glsl_get_bit_size(val->type);
275       val->def = nir_undef(&b->nb, num_components, bit_size);
276    } else {
277       unsigned elems = glsl_get_length(val->type);
278       val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
279       if (glsl_type_is_array_or_matrix(type)) {
280          const struct glsl_type *elem_type = glsl_get_array_element(type);
281          for (unsigned i = 0; i < elems; i++)
282             val->elems[i] = vtn_undef_ssa_value(b, elem_type);
283       } else {
284          vtn_assert(glsl_type_is_struct_or_ifc(type));
285          for (unsigned i = 0; i < elems; i++) {
286             const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
287             val->elems[i] = vtn_undef_ssa_value(b, elem_type);
288          }
289       }
290    }
291 
292    return val;
293 }
294 
295 struct vtn_ssa_value *
vtn_const_ssa_value(struct vtn_builder * b,nir_constant * constant,const struct glsl_type * type)296 vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
297                     const struct glsl_type *type)
298 {
299    struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
300    val->type = glsl_get_bare_type(type);
301 
302    if (glsl_type_is_cmat(type)) {
303       const struct glsl_type *element_type = glsl_get_cmat_element(type);
304 
305       nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_constant");
306       nir_cmat_construct(&b->nb, &mat->def,
307                          nir_build_imm(&b->nb, 1, glsl_get_bit_size(element_type),
308                                        constant->values));
309       vtn_set_ssa_value_var(b, val, mat->var);
310    } else if (glsl_type_is_vector_or_scalar(type)) {
311       val->def = nir_build_imm(&b->nb, glsl_get_vector_elements(val->type),
312                                glsl_get_bit_size(val->type),
313                                constant->values);
314    } else {
315       unsigned elems = glsl_get_length(val->type);
316       val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
317       if (glsl_type_is_array_or_matrix(type)) {
318          const struct glsl_type *elem_type = glsl_get_array_element(type);
319          for (unsigned i = 0; i < elems; i++) {
320             val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
321                                                 elem_type);
322          }
323       } else {
324          vtn_assert(glsl_type_is_struct_or_ifc(type));
325          for (unsigned i = 0; i < elems; i++) {
326             const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
327             val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
328                                                 elem_type);
329          }
330       }
331    }
332 
333    return val;
334 }
335 
336 struct vtn_ssa_value *
vtn_ssa_value(struct vtn_builder * b,uint32_t value_id)337 vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
338 {
339    struct vtn_value *val = vtn_untyped_value(b, value_id);
340    switch (val->value_type) {
341    case vtn_value_type_undef:
342       return vtn_undef_ssa_value(b, val->type->type);
343 
344    case vtn_value_type_constant:
345       return vtn_const_ssa_value(b, val->constant, val->type->type);
346 
347    case vtn_value_type_ssa:
348       return val->ssa;
349 
350    case vtn_value_type_pointer:
351       vtn_assert(val->pointer->ptr_type && val->pointer->ptr_type->type);
352       struct vtn_ssa_value *ssa =
353          vtn_create_ssa_value(b, val->pointer->ptr_type->type);
354       ssa->def = vtn_pointer_to_ssa(b, val->pointer);
355       return ssa;
356 
357    default:
358       vtn_fail("Invalid type for an SSA value");
359    }
360 }
361 
362 struct vtn_value *
vtn_push_ssa_value(struct vtn_builder * b,uint32_t value_id,struct vtn_ssa_value * ssa)363 vtn_push_ssa_value(struct vtn_builder *b, uint32_t value_id,
364                    struct vtn_ssa_value *ssa)
365 {
366    struct vtn_type *type = vtn_get_value_type(b, value_id);
367 
368    /* See vtn_create_ssa_value */
369    vtn_fail_if(ssa->type != glsl_get_bare_type(type->type),
370                "Type mismatch for SPIR-V value %%%u", value_id);
371 
372    struct vtn_value *val;
373    if (type->base_type == vtn_base_type_pointer) {
374       val = vtn_push_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type));
375    } else {
376       /* Don't trip the value_type_ssa check in vtn_push_value */
377       val = vtn_push_value(b, value_id, vtn_value_type_invalid);
378       val->value_type = vtn_value_type_ssa;
379       val->ssa = ssa;
380    }
381 
382    return val;
383 }
384 
385 nir_def *
vtn_get_nir_ssa(struct vtn_builder * b,uint32_t value_id)386 vtn_get_nir_ssa(struct vtn_builder *b, uint32_t value_id)
387 {
388    struct vtn_ssa_value *ssa = vtn_ssa_value(b, value_id);
389    vtn_fail_if(!glsl_type_is_vector_or_scalar(ssa->type),
390                "Expected a vector or scalar type");
391    return ssa->def;
392 }
393 
394 struct vtn_value *
vtn_push_nir_ssa(struct vtn_builder * b,uint32_t value_id,nir_def * def)395 vtn_push_nir_ssa(struct vtn_builder *b, uint32_t value_id, nir_def *def)
396 {
397    /* Types for all SPIR-V SSA values are set as part of a pre-pass so the
398     * type will be valid by the time we get here.
399     */
400    struct vtn_type *type = vtn_get_value_type(b, value_id);
401    vtn_fail_if(def->num_components != glsl_get_vector_elements(type->type) ||
402                def->bit_size != glsl_get_bit_size(type->type),
403                "Mismatch between NIR and SPIR-V type.");
404    struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
405    ssa->def = def;
406    return vtn_push_ssa_value(b, value_id, ssa);
407 }
408 
409 nir_deref_instr *
vtn_get_deref_for_id(struct vtn_builder * b,uint32_t value_id)410 vtn_get_deref_for_id(struct vtn_builder *b, uint32_t value_id)
411 {
412    return vtn_get_deref_for_ssa_value(b, vtn_ssa_value(b, value_id));
413 }
414 
415 nir_deref_instr *
vtn_get_deref_for_ssa_value(struct vtn_builder * b,struct vtn_ssa_value * ssa)416 vtn_get_deref_for_ssa_value(struct vtn_builder *b, struct vtn_ssa_value *ssa)
417 {
418    vtn_fail_if(!ssa->is_variable, "Expected an SSA value with a nir_variable");
419    return nir_build_deref_var(&b->nb, ssa->var);
420 }
421 
422 struct vtn_value *
vtn_push_var_ssa(struct vtn_builder * b,uint32_t value_id,nir_variable * var)423 vtn_push_var_ssa(struct vtn_builder *b, uint32_t value_id, nir_variable *var)
424 {
425    struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, var->type);
426    vtn_set_ssa_value_var(b, ssa, var);
427    return vtn_push_ssa_value(b, value_id, ssa);
428 }
429 
430 static enum gl_access_qualifier
spirv_to_gl_access_qualifier(struct vtn_builder * b,SpvAccessQualifier access_qualifier)431 spirv_to_gl_access_qualifier(struct vtn_builder *b,
432                              SpvAccessQualifier access_qualifier)
433 {
434    switch (access_qualifier) {
435    case SpvAccessQualifierReadOnly:
436       return ACCESS_NON_WRITEABLE;
437    case SpvAccessQualifierWriteOnly:
438       return ACCESS_NON_READABLE;
439    case SpvAccessQualifierReadWrite:
440       return 0;
441    default:
442       vtn_fail("Invalid image access qualifier");
443    }
444 }
445 
446 static nir_deref_instr *
vtn_get_image(struct vtn_builder * b,uint32_t value_id,enum gl_access_qualifier * access)447 vtn_get_image(struct vtn_builder *b, uint32_t value_id,
448               enum gl_access_qualifier *access)
449 {
450    struct vtn_type *type = vtn_get_value_type(b, value_id);
451    vtn_assert(type->base_type == vtn_base_type_image);
452    if (access)
453       *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier);
454    nir_variable_mode mode = glsl_type_is_image(type->glsl_image) ?
455                             nir_var_image : nir_var_uniform;
456    return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
457                                mode, type->glsl_image, 0);
458 }
459 
460 static void
vtn_push_image(struct vtn_builder * b,uint32_t value_id,nir_deref_instr * deref,bool propagate_non_uniform)461 vtn_push_image(struct vtn_builder *b, uint32_t value_id,
462                nir_deref_instr *deref, bool propagate_non_uniform)
463 {
464    struct vtn_type *type = vtn_get_value_type(b, value_id);
465    vtn_assert(type->base_type == vtn_base_type_image);
466    struct vtn_value *value = vtn_push_nir_ssa(b, value_id, &deref->def);
467    value->propagated_non_uniform = propagate_non_uniform;
468 }
469 
470 static nir_deref_instr *
vtn_get_sampler(struct vtn_builder * b,uint32_t value_id)471 vtn_get_sampler(struct vtn_builder *b, uint32_t value_id)
472 {
473    struct vtn_type *type = vtn_get_value_type(b, value_id);
474    vtn_assert(type->base_type == vtn_base_type_sampler);
475    return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
476                                nir_var_uniform, glsl_bare_sampler_type(), 0);
477 }
478 
479 nir_def *
vtn_sampled_image_to_nir_ssa(struct vtn_builder * b,struct vtn_sampled_image si)480 vtn_sampled_image_to_nir_ssa(struct vtn_builder *b,
481                              struct vtn_sampled_image si)
482 {
483    return nir_vec2(&b->nb, &si.image->def, &si.sampler->def);
484 }
485 
486 static void
vtn_push_sampled_image(struct vtn_builder * b,uint32_t value_id,struct vtn_sampled_image si,bool propagate_non_uniform)487 vtn_push_sampled_image(struct vtn_builder *b, uint32_t value_id,
488                        struct vtn_sampled_image si, bool propagate_non_uniform)
489 {
490    struct vtn_type *type = vtn_get_value_type(b, value_id);
491    vtn_assert(type->base_type == vtn_base_type_sampled_image);
492    struct vtn_value *value = vtn_push_nir_ssa(b, value_id,
493                                               vtn_sampled_image_to_nir_ssa(b, si));
494    value->propagated_non_uniform = propagate_non_uniform;
495 }
496 
497 static struct vtn_sampled_image
vtn_get_sampled_image(struct vtn_builder * b,uint32_t value_id)498 vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id)
499 {
500    struct vtn_type *type = vtn_get_value_type(b, value_id);
501    vtn_assert(type->base_type == vtn_base_type_sampled_image);
502    nir_def *si_vec2 = vtn_get_nir_ssa(b, value_id);
503 
504    /* Even though this is a sampled image, we can end up here with a storage
505     * image because OpenCL doesn't distinguish between the two.
506     */
507    const struct glsl_type *image_type = type->image->glsl_image;
508    nir_variable_mode image_mode = glsl_type_is_image(image_type) ?
509                                   nir_var_image : nir_var_uniform;
510 
511    struct vtn_sampled_image si = { NULL, };
512    si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0),
513                                    image_mode, image_type, 0);
514    si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1),
515                                      nir_var_uniform,
516                                      glsl_bare_sampler_type(), 0);
517    return si;
518 }
519 
520 const char *
vtn_string_literal(struct vtn_builder * b,const uint32_t * words,unsigned word_count,unsigned * words_used)521 vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
522                    unsigned word_count, unsigned *words_used)
523 {
524    /* From the SPIR-V spec:
525     *
526     *    "A string is interpreted as a nul-terminated stream of characters.
527     *    The character set is Unicode in the UTF-8 encoding scheme. The UTF-8
528     *    octets (8-bit bytes) are packed four per word, following the
529     *    little-endian convention (i.e., the first octet is in the
530     *    lowest-order 8 bits of the word). The final word contains the
531     *    string’s nul-termination character (0), and all contents past the
532     *    end of the string in the final word are padded with 0."
533     *
534     * On big-endian, we need to byte-swap.
535     */
536 #if UTIL_ARCH_BIG_ENDIAN
537    {
538       uint32_t *copy = vtn_alloc_array(b, uint32_t, word_count);
539       for (unsigned i = 0; i < word_count; i++)
540          copy[i] = util_bswap32(words[i]);
541       words = copy;
542    }
543 #endif
544 
545    const char *str = (const char *)words;
546    const char *end = memchr(str, 0, word_count * 4);
547    vtn_fail_if(end == NULL, "String is not null-terminated");
548 
549    if (words_used)
550       *words_used = DIV_ROUND_UP(end - str + 1, sizeof(*words));
551 
552    return str;
553 }
554 
555 const uint32_t *
vtn_foreach_instruction(struct vtn_builder * b,const uint32_t * start,const uint32_t * end,vtn_instruction_handler handler)556 vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
557                         const uint32_t *end, vtn_instruction_handler handler)
558 {
559    b->file = NULL;
560    b->line = -1;
561    b->col = -1;
562 
563    const uint32_t *w = start;
564    while (w < end) {
565       SpvOp opcode = w[0] & SpvOpCodeMask;
566       unsigned count = w[0] >> SpvWordCountShift;
567       vtn_assert(count >= 1 && w + count <= end);
568 
569       b->spirv_offset = (uint8_t *)w - (uint8_t *)b->spirv;
570 
571       switch (opcode) {
572       case SpvOpNop:
573          break; /* Do nothing */
574 
575       case SpvOpLine:
576          b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
577          b->line = w[2];
578          b->col = w[3];
579          break;
580 
581       case SpvOpNoLine:
582          b->file = NULL;
583          b->line = -1;
584          b->col = -1;
585          break;
586 
587       default:
588          if (!handler(b, opcode, w, count))
589             return w;
590          break;
591       }
592 
593       w += count;
594    }
595 
596    b->spirv_offset = 0;
597    b->file = NULL;
598    b->line = -1;
599    b->col = -1;
600 
601    assert(w == end);
602    return w;
603 }
604 
605 static bool
vtn_handle_non_semantic_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)606 vtn_handle_non_semantic_instruction(struct vtn_builder *b, SpvOp ext_opcode,
607                                     const uint32_t *w, unsigned count)
608 {
609    /* Do nothing. */
610    return true;
611 }
612 
613 static void
vtn_handle_extension(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)614 vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
615                      const uint32_t *w, unsigned count)
616 {
617    switch (opcode) {
618    case SpvOpExtInstImport: {
619       struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
620       const char *ext = vtn_string_literal(b, &w[2], count - 2, NULL);
621       if (strcmp(ext, "GLSL.std.450") == 0) {
622          val->ext_handler = vtn_handle_glsl450_instruction;
623       } else if ((strcmp(ext, "SPV_AMD_gcn_shader") == 0)
624                 && (b->options && b->options->caps.amd_gcn_shader)) {
625          val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
626       } else if ((strcmp(ext, "SPV_AMD_shader_ballot") == 0)
627                 && (b->options && b->options->caps.amd_shader_ballot)) {
628          val->ext_handler = vtn_handle_amd_shader_ballot_instruction;
629       } else if ((strcmp(ext, "SPV_AMD_shader_trinary_minmax") == 0)
630                 && (b->options && b->options->caps.amd_trinary_minmax)) {
631          val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction;
632       } else if ((strcmp(ext, "SPV_AMD_shader_explicit_vertex_parameter") == 0)
633                 && (b->options && b->options->caps.amd_shader_explicit_vertex_parameter)) {
634          val->ext_handler = vtn_handle_amd_shader_explicit_vertex_parameter_instruction;
635       } else if (strcmp(ext, "OpenCL.std") == 0) {
636          val->ext_handler = vtn_handle_opencl_instruction;
637       } else if (strstr(ext, "NonSemantic.") == ext) {
638          val->ext_handler = vtn_handle_non_semantic_instruction;
639       } else {
640          vtn_fail("Unsupported extension: %s", ext);
641       }
642       break;
643    }
644 
645    case SpvOpExtInst: {
646       struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
647       bool handled = val->ext_handler(b, w[4], w, count);
648       vtn_assert(handled);
649       break;
650    }
651 
652    default:
653       vtn_fail_with_opcode("Unhandled opcode", opcode);
654    }
655 }
656 
657 static void
_foreach_decoration_helper(struct vtn_builder * b,struct vtn_value * base_value,int parent_member,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)658 _foreach_decoration_helper(struct vtn_builder *b,
659                            struct vtn_value *base_value,
660                            int parent_member,
661                            struct vtn_value *value,
662                            vtn_decoration_foreach_cb cb, void *data)
663 {
664    for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
665       int member;
666       if (dec->scope == VTN_DEC_DECORATION) {
667          member = parent_member;
668       } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
669          vtn_fail_if(value->value_type != vtn_value_type_type ||
670                      value->type->base_type != vtn_base_type_struct,
671                      "OpMemberDecorate and OpGroupMemberDecorate are only "
672                      "allowed on OpTypeStruct");
673          /* This means we haven't recursed yet */
674          assert(value == base_value);
675 
676          member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
677 
678          vtn_fail_if(member >= base_value->type->length,
679                      "OpMemberDecorate specifies member %d but the "
680                      "OpTypeStruct has only %u members",
681                      member, base_value->type->length);
682       } else {
683          /* Not a decoration */
684          assert(dec->scope == VTN_DEC_EXECUTION_MODE ||
685                 dec->scope <= VTN_DEC_STRUCT_MEMBER_NAME0);
686          continue;
687       }
688 
689       if (dec->group) {
690          assert(dec->group->value_type == vtn_value_type_decoration_group);
691          _foreach_decoration_helper(b, base_value, member, dec->group,
692                                     cb, data);
693       } else {
694          cb(b, base_value, member, dec, data);
695       }
696    }
697 }
698 
699 /** Iterates (recursively if needed) over all of the decorations on a value
700  *
701  * This function iterates over all of the decorations applied to a given
702  * value.  If it encounters a decoration group, it recurses into the group
703  * and iterates over all of those decorations as well.
704  */
705 void
vtn_foreach_decoration(struct vtn_builder * b,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)706 vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
707                        vtn_decoration_foreach_cb cb, void *data)
708 {
709    _foreach_decoration_helper(b, value, -1, value, cb, data);
710 }
711 
712 void
vtn_foreach_execution_mode(struct vtn_builder * b,struct vtn_value * value,vtn_execution_mode_foreach_cb cb,void * data)713 vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
714                            vtn_execution_mode_foreach_cb cb, void *data)
715 {
716    for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
717       if (dec->scope != VTN_DEC_EXECUTION_MODE)
718          continue;
719 
720       assert(dec->group == NULL);
721       cb(b, value, dec, data);
722    }
723 }
724 
725 void
vtn_handle_decoration(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)726 vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
727                       const uint32_t *w, unsigned count)
728 {
729    const uint32_t *w_end = w + count;
730    const uint32_t target = w[1];
731    w += 2;
732 
733    switch (opcode) {
734    case SpvOpDecorationGroup:
735       vtn_push_value(b, target, vtn_value_type_decoration_group);
736       break;
737 
738    case SpvOpDecorate:
739    case SpvOpDecorateId:
740    case SpvOpMemberDecorate:
741    case SpvOpDecorateString:
742    case SpvOpMemberDecorateString:
743    case SpvOpExecutionMode:
744    case SpvOpExecutionModeId: {
745       struct vtn_value *val = vtn_untyped_value(b, target);
746 
747       struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
748       switch (opcode) {
749       case SpvOpDecorate:
750       case SpvOpDecorateId:
751       case SpvOpDecorateString:
752          dec->scope = VTN_DEC_DECORATION;
753          break;
754       case SpvOpMemberDecorate:
755       case SpvOpMemberDecorateString:
756          dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
757          vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */
758                      "Member argument of OpMemberDecorate too large");
759          break;
760       case SpvOpExecutionMode:
761       case SpvOpExecutionModeId:
762          dec->scope = VTN_DEC_EXECUTION_MODE;
763          break;
764       default:
765          unreachable("Invalid decoration opcode");
766       }
767       dec->decoration = *(w++);
768       dec->num_operands = w_end - w;
769       dec->operands = w;
770 
771       /* Link into the list */
772       dec->next = val->decoration;
773       val->decoration = dec;
774       break;
775    }
776 
777    case SpvOpMemberName: {
778       struct vtn_value *val = vtn_untyped_value(b, target);
779       struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
780 
781       dec->scope = VTN_DEC_STRUCT_MEMBER_NAME0 - *(w++);
782 
783       dec->member_name = vtn_string_literal(b, w, w_end - w, NULL);
784 
785       dec->next = val->decoration;
786       val->decoration = dec;
787       break;
788    }
789 
790    case SpvOpGroupMemberDecorate:
791    case SpvOpGroupDecorate: {
792       struct vtn_value *group =
793          vtn_value(b, target, vtn_value_type_decoration_group);
794 
795       for (; w < w_end; w++) {
796          struct vtn_value *val = vtn_untyped_value(b, *w);
797          struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
798 
799          dec->group = group;
800          if (opcode == SpvOpGroupDecorate) {
801             dec->scope = VTN_DEC_DECORATION;
802          } else {
803             dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
804             vtn_fail_if(dec->scope < 0, /* Check for overflow */
805                         "Member argument of OpGroupMemberDecorate too large");
806          }
807 
808          /* Link into the list */
809          dec->next = val->decoration;
810          val->decoration = dec;
811       }
812       break;
813    }
814 
815    default:
816       unreachable("Unhandled opcode");
817    }
818 }
819 
820 struct member_decoration_ctx {
821    unsigned num_fields;
822    struct glsl_struct_field *fields;
823    struct vtn_type *type;
824 };
825 
826 /**
827  * Returns true if the given type contains a struct decorated Block or
828  * BufferBlock
829  */
830 bool
vtn_type_contains_block(struct vtn_builder * b,struct vtn_type * type)831 vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type)
832 {
833    switch (type->base_type) {
834    case vtn_base_type_array:
835       return vtn_type_contains_block(b, type->array_element);
836    case vtn_base_type_struct:
837       if (type->block || type->buffer_block)
838          return true;
839       for (unsigned i = 0; i < type->length; i++) {
840          if (vtn_type_contains_block(b, type->members[i]))
841             return true;
842       }
843       return false;
844    default:
845       return false;
846    }
847 }
848 
849 /** Returns true if two types are "compatible", i.e. you can do an OpLoad,
850  * OpStore, or OpCopyMemory between them without breaking anything.
851  * Technically, the SPIR-V rules require the exact same type ID but this lets
852  * us internally be a bit looser.
853  */
854 bool
vtn_types_compatible(struct vtn_builder * b,struct vtn_type * t1,struct vtn_type * t2)855 vtn_types_compatible(struct vtn_builder *b,
856                      struct vtn_type *t1, struct vtn_type *t2)
857 {
858    if (t1->id == t2->id)
859       return true;
860 
861    if (t1->base_type != t2->base_type)
862       return false;
863 
864    switch (t1->base_type) {
865    case vtn_base_type_void:
866    case vtn_base_type_scalar:
867    case vtn_base_type_vector:
868    case vtn_base_type_matrix:
869    case vtn_base_type_image:
870    case vtn_base_type_sampler:
871    case vtn_base_type_sampled_image:
872    case vtn_base_type_event:
873    case vtn_base_type_cooperative_matrix:
874       return t1->type == t2->type;
875 
876    case vtn_base_type_array:
877       return t1->length == t2->length &&
878              vtn_types_compatible(b, t1->array_element, t2->array_element);
879 
880    case vtn_base_type_pointer:
881       return vtn_types_compatible(b, t1->deref, t2->deref);
882 
883    case vtn_base_type_struct:
884       if (t1->length != t2->length)
885          return false;
886 
887       for (unsigned i = 0; i < t1->length; i++) {
888          if (!vtn_types_compatible(b, t1->members[i], t2->members[i]))
889             return false;
890       }
891       return true;
892 
893    case vtn_base_type_accel_struct:
894    case vtn_base_type_ray_query:
895       return true;
896 
897    case vtn_base_type_function:
898       /* This case shouldn't get hit since you can't copy around function
899        * types.  Just require them to be identical.
900        */
901       return false;
902    }
903 
904    vtn_fail("Invalid base type");
905 }
906 
907 struct vtn_type *
vtn_type_without_array(struct vtn_type * type)908 vtn_type_without_array(struct vtn_type *type)
909 {
910    while (type->base_type == vtn_base_type_array)
911       type = type->array_element;
912    return type;
913 }
914 
915 /* does a shallow copy of a vtn_type */
916 
917 static struct vtn_type *
vtn_type_copy(struct vtn_builder * b,struct vtn_type * src)918 vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
919 {
920    struct vtn_type *dest = vtn_alloc(b, struct vtn_type);
921    *dest = *src;
922 
923    switch (src->base_type) {
924    case vtn_base_type_void:
925    case vtn_base_type_scalar:
926    case vtn_base_type_vector:
927    case vtn_base_type_matrix:
928    case vtn_base_type_array:
929    case vtn_base_type_pointer:
930    case vtn_base_type_image:
931    case vtn_base_type_sampler:
932    case vtn_base_type_sampled_image:
933    case vtn_base_type_event:
934    case vtn_base_type_accel_struct:
935    case vtn_base_type_ray_query:
936    case vtn_base_type_cooperative_matrix:
937       /* Nothing more to do */
938       break;
939 
940    case vtn_base_type_struct:
941       dest->members = vtn_alloc_array(b, struct vtn_type *, src->length);
942       memcpy(dest->members, src->members,
943              src->length * sizeof(src->members[0]));
944 
945       dest->offsets = vtn_alloc_array(b, unsigned, src->length);
946       memcpy(dest->offsets, src->offsets,
947              src->length * sizeof(src->offsets[0]));
948       break;
949 
950    case vtn_base_type_function:
951       dest->params = vtn_alloc_array(b, struct vtn_type *, src->length);
952       memcpy(dest->params, src->params, src->length * sizeof(src->params[0]));
953       break;
954    }
955 
956    return dest;
957 }
958 
959 static bool
vtn_type_needs_explicit_layout(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)960 vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type,
961                                enum vtn_variable_mode mode)
962 {
963    /* For OpenCL we never want to strip the info from the types, and it makes
964     * type comparisons easier in later stages.
965     */
966    if (b->options->environment == NIR_SPIRV_OPENCL)
967       return true;
968 
969    switch (mode) {
970    case vtn_variable_mode_input:
971    case vtn_variable_mode_output:
972       /* Layout decorations kept because we need offsets for XFB arrays of
973        * blocks.
974        */
975       return b->shader->info.has_transform_feedback_varyings;
976 
977    case vtn_variable_mode_ssbo:
978    case vtn_variable_mode_phys_ssbo:
979    case vtn_variable_mode_ubo:
980    case vtn_variable_mode_push_constant:
981    case vtn_variable_mode_shader_record:
982       return true;
983 
984    case vtn_variable_mode_workgroup:
985       return b->options->caps.workgroup_memory_explicit_layout;
986 
987    default:
988       return false;
989    }
990 }
991 
992 const struct glsl_type *
vtn_type_get_nir_type(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)993 vtn_type_get_nir_type(struct vtn_builder *b, struct vtn_type *type,
994                       enum vtn_variable_mode mode)
995 {
996    if (mode == vtn_variable_mode_atomic_counter) {
997       vtn_fail_if(glsl_without_array(type->type) != glsl_uint_type(),
998                   "Variables in the AtomicCounter storage class should be "
999                   "(possibly arrays of arrays of) uint.");
1000       return glsl_type_wrap_in_arrays(glsl_atomic_uint_type(), type->type);
1001    }
1002 
1003    if (mode == vtn_variable_mode_uniform) {
1004       switch (type->base_type) {
1005       case vtn_base_type_array: {
1006          const struct glsl_type *elem_type =
1007             vtn_type_get_nir_type(b, type->array_element, mode);
1008 
1009          return glsl_array_type(elem_type, type->length,
1010                                 glsl_get_explicit_stride(type->type));
1011       }
1012 
1013       case vtn_base_type_struct: {
1014          bool need_new_struct = false;
1015          const uint32_t num_fields = type->length;
1016          NIR_VLA(struct glsl_struct_field, fields, num_fields);
1017          for (unsigned i = 0; i < num_fields; i++) {
1018             fields[i] = *glsl_get_struct_field_data(type->type, i);
1019             const struct glsl_type *field_nir_type =
1020                vtn_type_get_nir_type(b, type->members[i], mode);
1021             if (fields[i].type != field_nir_type) {
1022                fields[i].type = field_nir_type;
1023                need_new_struct = true;
1024             }
1025          }
1026          if (need_new_struct) {
1027             if (glsl_type_is_interface(type->type)) {
1028                return glsl_interface_type(fields, num_fields,
1029                                           /* packing */ 0, false,
1030                                           glsl_get_type_name(type->type));
1031             } else {
1032                return glsl_struct_type(fields, num_fields,
1033                                        glsl_get_type_name(type->type),
1034                                        glsl_struct_type_is_packed(type->type));
1035             }
1036          } else {
1037             /* No changes, just pass it on */
1038             return type->type;
1039          }
1040       }
1041 
1042       case vtn_base_type_image:
1043          vtn_assert(glsl_type_is_texture(type->glsl_image));
1044          return type->glsl_image;
1045 
1046       case vtn_base_type_sampler:
1047          return glsl_bare_sampler_type();
1048 
1049       case vtn_base_type_sampled_image:
1050          return glsl_texture_type_to_sampler(type->image->glsl_image,
1051                                              false /* is_shadow */);
1052 
1053       default:
1054          return type->type;
1055       }
1056    }
1057 
1058    if (mode == vtn_variable_mode_image) {
1059       struct vtn_type *image_type = vtn_type_without_array(type);
1060       vtn_assert(image_type->base_type == vtn_base_type_image);
1061       return glsl_type_wrap_in_arrays(image_type->glsl_image, type->type);
1062    }
1063 
1064    /* Layout decorations are allowed but ignored in certain conditions,
1065     * to allow SPIR-V generators perform type deduplication.  Discard
1066     * unnecessary ones when passing to NIR.
1067     */
1068    if (!vtn_type_needs_explicit_layout(b, type, mode))
1069       return glsl_get_bare_type(type->type);
1070 
1071    return type->type;
1072 }
1073 
1074 static struct vtn_type *
mutable_matrix_member(struct vtn_builder * b,struct vtn_type * type,int member)1075 mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
1076 {
1077    type->members[member] = vtn_type_copy(b, type->members[member]);
1078    type = type->members[member];
1079 
1080    /* We may have an array of matrices.... Oh, joy! */
1081    while (glsl_type_is_array(type->type)) {
1082       type->array_element = vtn_type_copy(b, type->array_element);
1083       type = type->array_element;
1084    }
1085 
1086    vtn_assert(glsl_type_is_matrix(type->type));
1087 
1088    return type;
1089 }
1090 
1091 static void
vtn_handle_access_qualifier(struct vtn_builder * b,struct vtn_type * type,int member,enum gl_access_qualifier access)1092 vtn_handle_access_qualifier(struct vtn_builder *b, struct vtn_type *type,
1093                             int member, enum gl_access_qualifier access)
1094 {
1095    type->members[member] = vtn_type_copy(b, type->members[member]);
1096    type = type->members[member];
1097 
1098    type->access |= access;
1099 }
1100 
1101 static void
array_stride_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1102 array_stride_decoration_cb(struct vtn_builder *b,
1103                            struct vtn_value *val, int member,
1104                            const struct vtn_decoration *dec, void *void_ctx)
1105 {
1106    struct vtn_type *type = val->type;
1107 
1108    if (dec->decoration == SpvDecorationArrayStride) {
1109       if (vtn_type_contains_block(b, type)) {
1110          vtn_warn("The ArrayStride decoration cannot be applied to an array "
1111                   "type which contains a structure type decorated Block "
1112                   "or BufferBlock");
1113          /* Ignore the decoration */
1114       } else {
1115          vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero");
1116          type->stride = dec->operands[0];
1117       }
1118    }
1119 }
1120 
1121 static void
struct_member_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1122 struct_member_decoration_cb(struct vtn_builder *b,
1123                             UNUSED struct vtn_value *val, int member,
1124                             const struct vtn_decoration *dec, void *void_ctx)
1125 {
1126    struct member_decoration_ctx *ctx = void_ctx;
1127 
1128    if (member < 0)
1129       return;
1130 
1131    assert(member < ctx->num_fields);
1132 
1133    switch (dec->decoration) {
1134    case SpvDecorationRelaxedPrecision:
1135    case SpvDecorationUniform:
1136    case SpvDecorationUniformId:
1137       break; /* FIXME: Do nothing with this for now. */
1138    case SpvDecorationNonWritable:
1139       vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_WRITEABLE);
1140       break;
1141    case SpvDecorationNonReadable:
1142       vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_READABLE);
1143       break;
1144    case SpvDecorationVolatile:
1145       vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_VOLATILE);
1146       break;
1147    case SpvDecorationCoherent:
1148       vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_COHERENT);
1149       break;
1150    case SpvDecorationNoPerspective:
1151       ctx->fields[member].interpolation = INTERP_MODE_NOPERSPECTIVE;
1152       break;
1153    case SpvDecorationFlat:
1154       ctx->fields[member].interpolation = INTERP_MODE_FLAT;
1155       break;
1156    case SpvDecorationExplicitInterpAMD:
1157       ctx->fields[member].interpolation = INTERP_MODE_EXPLICIT;
1158       break;
1159    case SpvDecorationCentroid:
1160       ctx->fields[member].centroid = true;
1161       break;
1162    case SpvDecorationSample:
1163       ctx->fields[member].sample = true;
1164       break;
1165    case SpvDecorationStream:
1166       /* This is handled later by var_decoration_cb in vtn_variables.c */
1167       break;
1168    case SpvDecorationLocation:
1169       ctx->fields[member].location = dec->operands[0];
1170       break;
1171    case SpvDecorationComponent:
1172       break; /* FIXME: What should we do with these? */
1173    case SpvDecorationBuiltIn:
1174       ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
1175       ctx->type->members[member]->is_builtin = true;
1176       ctx->type->members[member]->builtin = dec->operands[0];
1177       ctx->type->builtin_block = true;
1178       break;
1179    case SpvDecorationOffset:
1180       ctx->type->offsets[member] = dec->operands[0];
1181       ctx->fields[member].offset = dec->operands[0];
1182       break;
1183    case SpvDecorationMatrixStride:
1184       /* Handled as a second pass */
1185       break;
1186    case SpvDecorationColMajor:
1187       break; /* Nothing to do here.  Column-major is the default. */
1188    case SpvDecorationRowMajor:
1189       mutable_matrix_member(b, ctx->type, member)->row_major = true;
1190       break;
1191 
1192    case SpvDecorationPatch:
1193    case SpvDecorationPerPrimitiveNV:
1194    case SpvDecorationPerTaskNV:
1195    case SpvDecorationPerViewNV:
1196       break;
1197 
1198    case SpvDecorationSpecId:
1199    case SpvDecorationBlock:
1200    case SpvDecorationBufferBlock:
1201    case SpvDecorationArrayStride:
1202    case SpvDecorationGLSLShared:
1203    case SpvDecorationGLSLPacked:
1204    case SpvDecorationAliased:
1205    case SpvDecorationConstant:
1206    case SpvDecorationIndex:
1207    case SpvDecorationBinding:
1208    case SpvDecorationDescriptorSet:
1209    case SpvDecorationLinkageAttributes:
1210    case SpvDecorationNoContraction:
1211    case SpvDecorationInputAttachmentIndex:
1212    case SpvDecorationCPacked:
1213       vtn_warn("Decoration not allowed on struct members: %s",
1214                spirv_decoration_to_string(dec->decoration));
1215       break;
1216 
1217    case SpvDecorationRestrict:
1218       /* While "Restrict" is invalid for struct members, glslang incorrectly
1219        * generates it and it ends up hiding actual driver issues in a wall of
1220        * spam from deqp-vk.  Return it to the above block once the issue is
1221        * resolved.  https://github.com/KhronosGroup/glslang/issues/703
1222        */
1223       break;
1224 
1225    case SpvDecorationInvariant:
1226       /* Also incorrectly generated by glslang, ignore it. */
1227       break;
1228 
1229    case SpvDecorationXfbBuffer:
1230    case SpvDecorationXfbStride:
1231       /* This is handled later by var_decoration_cb in vtn_variables.c */
1232       break;
1233 
1234    case SpvDecorationSaturatedConversion:
1235    case SpvDecorationFuncParamAttr:
1236    case SpvDecorationFPRoundingMode:
1237    case SpvDecorationFPFastMathMode:
1238    case SpvDecorationAlignment:
1239       if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1240          vtn_warn("Decoration only allowed for CL-style kernels: %s",
1241                   spirv_decoration_to_string(dec->decoration));
1242       }
1243       break;
1244 
1245    case SpvDecorationUserSemantic:
1246    case SpvDecorationUserTypeGOOGLE:
1247       /* User semantic decorations can safely be ignored by the driver. */
1248       break;
1249 
1250    default:
1251       vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1252    }
1253 }
1254 
1255 /** Chases the array type all the way down to the tail and rewrites the
1256  * glsl_types to be based off the tail's glsl_type.
1257  */
1258 static void
vtn_array_type_rewrite_glsl_type(struct vtn_type * type)1259 vtn_array_type_rewrite_glsl_type(struct vtn_type *type)
1260 {
1261    if (type->base_type != vtn_base_type_array)
1262       return;
1263 
1264    vtn_array_type_rewrite_glsl_type(type->array_element);
1265 
1266    type->type = glsl_array_type(type->array_element->type,
1267                                 type->length, type->stride);
1268 }
1269 
1270 /* Matrix strides are handled as a separate pass because we need to know
1271  * whether the matrix is row-major or not first.
1272  */
1273 static void
struct_member_matrix_stride_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1274 struct_member_matrix_stride_cb(struct vtn_builder *b,
1275                                UNUSED struct vtn_value *val, int member,
1276                                const struct vtn_decoration *dec,
1277                                void *void_ctx)
1278 {
1279    if (dec->decoration != SpvDecorationMatrixStride)
1280       return;
1281 
1282    vtn_fail_if(member < 0,
1283                "The MatrixStride decoration is only allowed on members "
1284                "of OpTypeStruct");
1285    vtn_fail_if(dec->operands[0] == 0, "MatrixStride must be non-zero");
1286 
1287    struct member_decoration_ctx *ctx = void_ctx;
1288 
1289    struct vtn_type *mat_type = mutable_matrix_member(b, ctx->type, member);
1290    if (mat_type->row_major) {
1291       mat_type->array_element = vtn_type_copy(b, mat_type->array_element);
1292       mat_type->stride = mat_type->array_element->stride;
1293       mat_type->array_element->stride = dec->operands[0];
1294 
1295       mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1296                                                  dec->operands[0], true);
1297       mat_type->array_element->type = glsl_get_column_type(mat_type->type);
1298    } else {
1299       vtn_assert(mat_type->array_element->stride > 0);
1300       mat_type->stride = dec->operands[0];
1301 
1302       mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1303                                                  dec->operands[0], false);
1304    }
1305 
1306    /* Now that we've replaced the glsl_type with a properly strided matrix
1307     * type, rewrite the member type so that it's an array of the proper kind
1308     * of glsl_type.
1309     */
1310    vtn_array_type_rewrite_glsl_type(ctx->type->members[member]);
1311    ctx->fields[member].type = ctx->type->members[member]->type;
1312 }
1313 
1314 static void
struct_packed_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1315 struct_packed_decoration_cb(struct vtn_builder *b,
1316                             struct vtn_value *val, int member,
1317                             const struct vtn_decoration *dec, void *void_ctx)
1318 {
1319    vtn_assert(val->type->base_type == vtn_base_type_struct);
1320    if (dec->decoration == SpvDecorationCPacked) {
1321       if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1322          vtn_warn("Decoration only allowed for CL-style kernels: %s",
1323                   spirv_decoration_to_string(dec->decoration));
1324       }
1325       val->type->packed = true;
1326    }
1327 }
1328 
1329 static void
struct_block_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * ctx)1330 struct_block_decoration_cb(struct vtn_builder *b,
1331                            struct vtn_value *val, int member,
1332                            const struct vtn_decoration *dec, void *ctx)
1333 {
1334    if (member != -1)
1335       return;
1336 
1337    struct vtn_type *type = val->type;
1338    if (dec->decoration == SpvDecorationBlock)
1339       type->block = true;
1340    else if (dec->decoration == SpvDecorationBufferBlock)
1341       type->buffer_block = true;
1342 }
1343 
1344 static void
type_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,UNUSED void * ctx)1345 type_decoration_cb(struct vtn_builder *b,
1346                    struct vtn_value *val, int member,
1347                    const struct vtn_decoration *dec, UNUSED void *ctx)
1348 {
1349    struct vtn_type *type = val->type;
1350 
1351    if (member != -1) {
1352       /* This should have been handled by OpTypeStruct */
1353       assert(val->type->base_type == vtn_base_type_struct);
1354       assert(member >= 0 && member < val->type->length);
1355       return;
1356    }
1357 
1358    switch (dec->decoration) {
1359    case SpvDecorationArrayStride:
1360       vtn_assert(type->base_type == vtn_base_type_array ||
1361                  type->base_type == vtn_base_type_pointer);
1362       break;
1363    case SpvDecorationBlock:
1364       vtn_assert(type->base_type == vtn_base_type_struct);
1365       vtn_assert(type->block);
1366       break;
1367    case SpvDecorationBufferBlock:
1368       vtn_assert(type->base_type == vtn_base_type_struct);
1369       vtn_assert(type->buffer_block);
1370       break;
1371    case SpvDecorationGLSLShared:
1372    case SpvDecorationGLSLPacked:
1373       /* Ignore these, since we get explicit offsets anyways */
1374       break;
1375 
1376    case SpvDecorationRowMajor:
1377    case SpvDecorationColMajor:
1378    case SpvDecorationMatrixStride:
1379    case SpvDecorationBuiltIn:
1380    case SpvDecorationNoPerspective:
1381    case SpvDecorationFlat:
1382    case SpvDecorationPatch:
1383    case SpvDecorationCentroid:
1384    case SpvDecorationSample:
1385    case SpvDecorationExplicitInterpAMD:
1386    case SpvDecorationVolatile:
1387    case SpvDecorationCoherent:
1388    case SpvDecorationNonWritable:
1389    case SpvDecorationNonReadable:
1390    case SpvDecorationUniform:
1391    case SpvDecorationUniformId:
1392    case SpvDecorationLocation:
1393    case SpvDecorationComponent:
1394    case SpvDecorationOffset:
1395    case SpvDecorationXfbBuffer:
1396    case SpvDecorationXfbStride:
1397    case SpvDecorationUserSemantic:
1398       vtn_warn("Decoration only allowed for struct members: %s",
1399                spirv_decoration_to_string(dec->decoration));
1400       break;
1401 
1402    case SpvDecorationStream:
1403       /* We don't need to do anything here, as stream is filled up when
1404        * aplying the decoration to a variable, just check that if it is not a
1405        * struct member, it should be a struct.
1406        */
1407       vtn_assert(type->base_type == vtn_base_type_struct);
1408       break;
1409 
1410    case SpvDecorationRelaxedPrecision:
1411    case SpvDecorationSpecId:
1412    case SpvDecorationInvariant:
1413    case SpvDecorationRestrict:
1414    case SpvDecorationAliased:
1415    case SpvDecorationConstant:
1416    case SpvDecorationIndex:
1417    case SpvDecorationBinding:
1418    case SpvDecorationDescriptorSet:
1419    case SpvDecorationLinkageAttributes:
1420    case SpvDecorationNoContraction:
1421    case SpvDecorationInputAttachmentIndex:
1422       vtn_warn("Decoration not allowed on types: %s",
1423                spirv_decoration_to_string(dec->decoration));
1424       break;
1425 
1426    case SpvDecorationCPacked:
1427       /* Handled when parsing a struct type, nothing to do here. */
1428       break;
1429 
1430    case SpvDecorationSaturatedConversion:
1431    case SpvDecorationFuncParamAttr:
1432    case SpvDecorationFPRoundingMode:
1433    case SpvDecorationFPFastMathMode:
1434    case SpvDecorationAlignment:
1435       vtn_warn("Decoration only allowed for CL-style kernels: %s",
1436                spirv_decoration_to_string(dec->decoration));
1437       break;
1438 
1439    case SpvDecorationUserTypeGOOGLE:
1440       /* User semantic decorations can safely be ignored by the driver. */
1441       break;
1442 
1443    default:
1444       vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1445    }
1446 }
1447 
1448 static unsigned
translate_image_format(struct vtn_builder * b,SpvImageFormat format)1449 translate_image_format(struct vtn_builder *b, SpvImageFormat format)
1450 {
1451    switch (format) {
1452    case SpvImageFormatUnknown:      return PIPE_FORMAT_NONE;
1453    case SpvImageFormatRgba32f:      return PIPE_FORMAT_R32G32B32A32_FLOAT;
1454    case SpvImageFormatRgba16f:      return PIPE_FORMAT_R16G16B16A16_FLOAT;
1455    case SpvImageFormatR32f:         return PIPE_FORMAT_R32_FLOAT;
1456    case SpvImageFormatRgba8:        return PIPE_FORMAT_R8G8B8A8_UNORM;
1457    case SpvImageFormatRgba8Snorm:   return PIPE_FORMAT_R8G8B8A8_SNORM;
1458    case SpvImageFormatRg32f:        return PIPE_FORMAT_R32G32_FLOAT;
1459    case SpvImageFormatRg16f:        return PIPE_FORMAT_R16G16_FLOAT;
1460    case SpvImageFormatR11fG11fB10f: return PIPE_FORMAT_R11G11B10_FLOAT;
1461    case SpvImageFormatR16f:         return PIPE_FORMAT_R16_FLOAT;
1462    case SpvImageFormatRgba16:       return PIPE_FORMAT_R16G16B16A16_UNORM;
1463    case SpvImageFormatRgb10A2:      return PIPE_FORMAT_R10G10B10A2_UNORM;
1464    case SpvImageFormatRg16:         return PIPE_FORMAT_R16G16_UNORM;
1465    case SpvImageFormatRg8:          return PIPE_FORMAT_R8G8_UNORM;
1466    case SpvImageFormatR16:          return PIPE_FORMAT_R16_UNORM;
1467    case SpvImageFormatR8:           return PIPE_FORMAT_R8_UNORM;
1468    case SpvImageFormatRgba16Snorm:  return PIPE_FORMAT_R16G16B16A16_SNORM;
1469    case SpvImageFormatRg16Snorm:    return PIPE_FORMAT_R16G16_SNORM;
1470    case SpvImageFormatRg8Snorm:     return PIPE_FORMAT_R8G8_SNORM;
1471    case SpvImageFormatR16Snorm:     return PIPE_FORMAT_R16_SNORM;
1472    case SpvImageFormatR8Snorm:      return PIPE_FORMAT_R8_SNORM;
1473    case SpvImageFormatRgba32i:      return PIPE_FORMAT_R32G32B32A32_SINT;
1474    case SpvImageFormatRgba16i:      return PIPE_FORMAT_R16G16B16A16_SINT;
1475    case SpvImageFormatRgba8i:       return PIPE_FORMAT_R8G8B8A8_SINT;
1476    case SpvImageFormatR32i:         return PIPE_FORMAT_R32_SINT;
1477    case SpvImageFormatRg32i:        return PIPE_FORMAT_R32G32_SINT;
1478    case SpvImageFormatRg16i:        return PIPE_FORMAT_R16G16_SINT;
1479    case SpvImageFormatRg8i:         return PIPE_FORMAT_R8G8_SINT;
1480    case SpvImageFormatR16i:         return PIPE_FORMAT_R16_SINT;
1481    case SpvImageFormatR8i:          return PIPE_FORMAT_R8_SINT;
1482    case SpvImageFormatRgba32ui:     return PIPE_FORMAT_R32G32B32A32_UINT;
1483    case SpvImageFormatRgba16ui:     return PIPE_FORMAT_R16G16B16A16_UINT;
1484    case SpvImageFormatRgba8ui:      return PIPE_FORMAT_R8G8B8A8_UINT;
1485    case SpvImageFormatR32ui:        return PIPE_FORMAT_R32_UINT;
1486    case SpvImageFormatRgb10a2ui:    return PIPE_FORMAT_R10G10B10A2_UINT;
1487    case SpvImageFormatRg32ui:       return PIPE_FORMAT_R32G32_UINT;
1488    case SpvImageFormatRg16ui:       return PIPE_FORMAT_R16G16_UINT;
1489    case SpvImageFormatRg8ui:        return PIPE_FORMAT_R8G8_UINT;
1490    case SpvImageFormatR16ui:        return PIPE_FORMAT_R16_UINT;
1491    case SpvImageFormatR8ui:         return PIPE_FORMAT_R8_UINT;
1492    case SpvImageFormatR64ui:        return PIPE_FORMAT_R64_UINT;
1493    case SpvImageFormatR64i:         return PIPE_FORMAT_R64_SINT;
1494    default:
1495       vtn_fail("Invalid image format: %s (%u)",
1496                spirv_imageformat_to_string(format), format);
1497    }
1498 }
1499 
1500 static void
validate_image_type_for_sampled_image(struct vtn_builder * b,const struct glsl_type * image_type,const char * operand)1501 validate_image_type_for_sampled_image(struct vtn_builder *b,
1502                                       const struct glsl_type *image_type,
1503                                       const char *operand)
1504 {
1505    /* From OpTypeSampledImage description in SPIR-V 1.6, revision 1:
1506     *
1507     *   Image Type must be an OpTypeImage. It is the type of the image in the
1508     *   combined sampler and image type. It must not have a Dim of
1509     *   SubpassData. Additionally, starting with version 1.6, it must not have
1510     *   a Dim of Buffer.
1511     *
1512     * Same also applies to the type of the Image operand in OpSampledImage.
1513     */
1514 
1515    const enum glsl_sampler_dim dim = glsl_get_sampler_dim(image_type);
1516 
1517    vtn_fail_if(dim == GLSL_SAMPLER_DIM_SUBPASS ||
1518                dim == GLSL_SAMPLER_DIM_SUBPASS_MS,
1519                "%s must not have a Dim of SubpassData.", operand);
1520 
1521    if (dim == GLSL_SAMPLER_DIM_BUF) {
1522       if (b->version >= 0x10600) {
1523          vtn_fail("Starting with SPIR-V 1.6, %s "
1524                   "must not have a Dim of Buffer.", operand);
1525       } else {
1526          vtn_warn("%s should not have a Dim of Buffer.", operand);
1527       }
1528    }
1529 }
1530 
1531 static void
vtn_handle_type(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1532 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
1533                 const uint32_t *w, unsigned count)
1534 {
1535    struct vtn_value *val = NULL;
1536 
1537    /* In order to properly handle forward declarations, we have to defer
1538     * allocation for pointer types.
1539     */
1540    if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) {
1541       val = vtn_push_value(b, w[1], vtn_value_type_type);
1542       vtn_fail_if(val->type != NULL,
1543                   "Only pointers can have forward declarations");
1544       val->type = vtn_zalloc(b, struct vtn_type);
1545       val->type->id = w[1];
1546    }
1547 
1548    switch (opcode) {
1549    case SpvOpTypeVoid:
1550       val->type->base_type = vtn_base_type_void;
1551       val->type->type = glsl_void_type();
1552       break;
1553    case SpvOpTypeBool:
1554       val->type->base_type = vtn_base_type_scalar;
1555       val->type->type = glsl_bool_type();
1556       val->type->length = 1;
1557       break;
1558    case SpvOpTypeInt: {
1559       int bit_size = w[2];
1560       const bool signedness = w[3];
1561       vtn_fail_if(bit_size != 8 && bit_size != 16 &&
1562                   bit_size != 32 && bit_size != 64,
1563                   "Invalid int bit size: %u", bit_size);
1564       val->type->base_type = vtn_base_type_scalar;
1565       val->type->type = signedness ? glsl_intN_t_type(bit_size) :
1566                                      glsl_uintN_t_type(bit_size);
1567       val->type->length = 1;
1568       break;
1569    }
1570 
1571    case SpvOpTypeFloat: {
1572       int bit_size = w[2];
1573       val->type->base_type = vtn_base_type_scalar;
1574       vtn_fail_if(bit_size != 16 && bit_size != 32 && bit_size != 64,
1575                   "Invalid float bit size: %u", bit_size);
1576       val->type->type = glsl_floatN_t_type(bit_size);
1577       val->type->length = 1;
1578       break;
1579    }
1580 
1581    case SpvOpTypeVector: {
1582       struct vtn_type *base = vtn_get_type(b, w[2]);
1583       unsigned elems = w[3];
1584 
1585       vtn_fail_if(base->base_type != vtn_base_type_scalar,
1586                   "Base type for OpTypeVector must be a scalar");
1587       vtn_fail_if((elems < 2 || elems > 4) && (elems != 8) && (elems != 16),
1588                   "Invalid component count for OpTypeVector");
1589 
1590       val->type->base_type = vtn_base_type_vector;
1591       val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
1592       val->type->length = elems;
1593       val->type->stride = glsl_type_is_boolean(val->type->type)
1594          ? 4 : glsl_get_bit_size(base->type) / 8;
1595       val->type->array_element = base;
1596       break;
1597    }
1598 
1599    case SpvOpTypeMatrix: {
1600       struct vtn_type *base = vtn_get_type(b, w[2]);
1601       unsigned columns = w[3];
1602 
1603       vtn_fail_if(base->base_type != vtn_base_type_vector,
1604                   "Base type for OpTypeMatrix must be a vector");
1605       vtn_fail_if(columns < 2 || columns > 4,
1606                   "Invalid column count for OpTypeMatrix");
1607 
1608       val->type->base_type = vtn_base_type_matrix;
1609       val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
1610                                          glsl_get_vector_elements(base->type),
1611                                          columns);
1612       vtn_fail_if(glsl_type_is_error(val->type->type),
1613                   "Unsupported base type for OpTypeMatrix");
1614       assert(!glsl_type_is_error(val->type->type));
1615       val->type->length = columns;
1616       val->type->array_element = base;
1617       val->type->row_major = false;
1618       val->type->stride = 0;
1619       break;
1620    }
1621 
1622    case SpvOpTypeRuntimeArray:
1623    case SpvOpTypeArray: {
1624       struct vtn_type *array_element = vtn_get_type(b, w[2]);
1625 
1626       if (opcode == SpvOpTypeRuntimeArray) {
1627          /* A length of 0 is used to denote unsized arrays */
1628          val->type->length = 0;
1629       } else {
1630          val->type->length = vtn_constant_uint(b, w[3]);
1631       }
1632 
1633       val->type->base_type = vtn_base_type_array;
1634       val->type->array_element = array_element;
1635 
1636       vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1637       val->type->type = glsl_array_type(array_element->type, val->type->length,
1638                                         val->type->stride);
1639       break;
1640    }
1641 
1642    case SpvOpTypeStruct: {
1643       unsigned num_fields = count - 2;
1644       val->type->base_type = vtn_base_type_struct;
1645       val->type->length = num_fields;
1646       val->type->members = vtn_alloc_array(b, struct vtn_type *, num_fields);
1647       val->type->offsets = vtn_alloc_array(b, unsigned, num_fields);
1648       val->type->packed = false;
1649 
1650       NIR_VLA(struct glsl_struct_field, fields, count);
1651       for (unsigned i = 0; i < num_fields; i++) {
1652          val->type->members[i] = vtn_get_type(b, w[i + 2]);
1653          const char *name = NULL;
1654          for (struct vtn_decoration *dec = val->decoration; dec; dec = dec->next) {
1655             if (dec->scope == VTN_DEC_STRUCT_MEMBER_NAME0 - i) {
1656                name = dec->member_name;
1657                break;
1658             }
1659          }
1660          if (!name)
1661             name = ralloc_asprintf(b, "field%d", i);
1662 
1663          fields[i] = (struct glsl_struct_field) {
1664             .type = val->type->members[i]->type,
1665             .name = name,
1666             .location = -1,
1667             .offset = -1,
1668          };
1669       }
1670 
1671       vtn_foreach_decoration(b, val, struct_packed_decoration_cb, NULL);
1672 
1673       struct member_decoration_ctx ctx = {
1674          .num_fields = num_fields,
1675          .fields = fields,
1676          .type = val->type
1677       };
1678 
1679       vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
1680 
1681       /* Propagate access specifiers that are present on all members to the overall type */
1682       enum gl_access_qualifier overall_access = ACCESS_COHERENT | ACCESS_VOLATILE |
1683                                                 ACCESS_NON_READABLE | ACCESS_NON_WRITEABLE;
1684       for (unsigned i = 0; i < num_fields; ++i)
1685          overall_access &= val->type->members[i]->access;
1686       val->type->access = overall_access;
1687 
1688       vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx);
1689 
1690       vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL);
1691 
1692       const char *name = val->name;
1693 
1694       if (val->type->block || val->type->buffer_block) {
1695          /* Packing will be ignored since types coming from SPIR-V are
1696           * explicitly laid out.
1697           */
1698          val->type->type = glsl_interface_type(fields, num_fields,
1699                                                /* packing */ 0, false,
1700                                                name ? name : "block");
1701       } else {
1702          val->type->type = glsl_struct_type(fields, num_fields,
1703                                             name ? name : "struct",
1704                                             val->type->packed);
1705       }
1706       break;
1707    }
1708 
1709    case SpvOpTypeFunction: {
1710       val->type->base_type = vtn_base_type_function;
1711       val->type->type = NULL;
1712 
1713       val->type->return_type = vtn_get_type(b, w[2]);
1714 
1715       const unsigned num_params = count - 3;
1716       val->type->length = num_params;
1717       val->type->params = vtn_alloc_array(b, struct vtn_type *, num_params);
1718       for (unsigned i = 0; i < count - 3; i++) {
1719          val->type->params[i] = vtn_get_type(b, w[i + 3]);
1720       }
1721       break;
1722    }
1723 
1724    case SpvOpTypePointer:
1725    case SpvOpTypeForwardPointer: {
1726       /* We can't blindly push the value because it might be a forward
1727        * declaration.
1728        */
1729       val = vtn_untyped_value(b, w[1]);
1730 
1731       SpvStorageClass storage_class = w[2];
1732 
1733       vtn_fail_if(opcode == SpvOpTypeForwardPointer &&
1734                   b->shader->info.stage != MESA_SHADER_KERNEL &&
1735                   storage_class != SpvStorageClassPhysicalStorageBuffer,
1736                   "OpTypeForwardPointer is only allowed in Vulkan with "
1737                   "the PhysicalStorageBuffer storage class");
1738 
1739       struct vtn_type *deref_type = NULL;
1740       if (opcode == SpvOpTypePointer)
1741          deref_type = vtn_get_type(b, w[3]);
1742 
1743       bool has_forward_pointer = false;
1744       if (val->value_type == vtn_value_type_invalid) {
1745          val->value_type = vtn_value_type_type;
1746          val->type = vtn_zalloc(b, struct vtn_type);
1747          val->type->id = w[1];
1748          val->type->base_type = vtn_base_type_pointer;
1749          val->type->storage_class = storage_class;
1750 
1751          /* These can actually be stored to nir_variables and used as SSA
1752           * values so they need a real glsl_type.
1753           */
1754          enum vtn_variable_mode mode = vtn_storage_class_to_mode(
1755             b, storage_class, deref_type, NULL);
1756 
1757          /* The deref type should only matter for the UniformConstant storage
1758           * class.  In particular, it should never matter for any storage
1759           * classes that are allowed in combination with OpTypeForwardPointer.
1760           */
1761          if (storage_class != SpvStorageClassUniform &&
1762              storage_class != SpvStorageClassUniformConstant) {
1763             assert(mode == vtn_storage_class_to_mode(b, storage_class,
1764                                                      NULL, NULL));
1765          }
1766 
1767          val->type->type = nir_address_format_to_glsl_type(
1768             vtn_mode_to_address_format(b, mode));
1769       } else {
1770          vtn_fail_if(val->type->storage_class != storage_class,
1771                      "The storage classes of an OpTypePointer and any "
1772                      "OpTypeForwardPointers that provide forward "
1773                      "declarations of it must match.");
1774          has_forward_pointer = true;
1775       }
1776 
1777       if (opcode == SpvOpTypePointer) {
1778          vtn_fail_if(val->type->deref != NULL,
1779                      "While OpTypeForwardPointer can be used to provide a "
1780                      "forward declaration of a pointer, OpTypePointer can "
1781                      "only be used once for a given id.");
1782 
1783          vtn_fail_if(has_forward_pointer &&
1784                      deref_type->base_type != vtn_base_type_struct,
1785                      "An OpTypePointer instruction must declare "
1786                      "Pointer Type to be a pointer to an OpTypeStruct.");
1787 
1788          val->type->deref = deref_type;
1789 
1790          /* Only certain storage classes use ArrayStride. */
1791          switch (storage_class) {
1792          case SpvStorageClassWorkgroup:
1793             if (!b->options->caps.workgroup_memory_explicit_layout)
1794                break;
1795             FALLTHROUGH;
1796 
1797          case SpvStorageClassUniform:
1798          case SpvStorageClassPushConstant:
1799          case SpvStorageClassStorageBuffer:
1800          case SpvStorageClassPhysicalStorageBuffer:
1801             vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1802             break;
1803 
1804          default:
1805             /* Nothing to do. */
1806             break;
1807          }
1808       }
1809       break;
1810    }
1811 
1812    case SpvOpTypeImage: {
1813       val->type->base_type = vtn_base_type_image;
1814 
1815       /* Images are represented in NIR as a scalar SSA value that is the
1816        * result of a deref instruction.  An OpLoad on an OpTypeImage pointer
1817        * from UniformConstant memory just takes the NIR deref from the pointer
1818        * and turns it into an SSA value.
1819        */
1820       val->type->type = nir_address_format_to_glsl_type(
1821          vtn_mode_to_address_format(b, vtn_variable_mode_function));
1822 
1823       const struct vtn_type *sampled_type = vtn_get_type(b, w[2]);
1824       if (b->shader->info.stage == MESA_SHADER_KERNEL) {
1825          vtn_fail_if(sampled_type->base_type != vtn_base_type_void,
1826                      "Sampled type of OpTypeImage must be void for kernels");
1827       } else {
1828          vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar,
1829                      "Sampled type of OpTypeImage must be a scalar");
1830          if (b->options->caps.image_atomic_int64) {
1831             vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32 &&
1832                         glsl_get_bit_size(sampled_type->type) != 64,
1833                         "Sampled type of OpTypeImage must be a 32 or 64-bit "
1834                         "scalar");
1835          } else {
1836             vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32,
1837                         "Sampled type of OpTypeImage must be a 32-bit scalar");
1838          }
1839       }
1840 
1841       enum glsl_sampler_dim dim;
1842       switch ((SpvDim)w[3]) {
1843       case SpvDim1D:       dim = GLSL_SAMPLER_DIM_1D;    break;
1844       case SpvDim2D:       dim = GLSL_SAMPLER_DIM_2D;    break;
1845       case SpvDim3D:       dim = GLSL_SAMPLER_DIM_3D;    break;
1846       case SpvDimCube:     dim = GLSL_SAMPLER_DIM_CUBE;  break;
1847       case SpvDimRect:     dim = GLSL_SAMPLER_DIM_RECT;  break;
1848       case SpvDimBuffer:   dim = GLSL_SAMPLER_DIM_BUF;   break;
1849       case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break;
1850       default:
1851          vtn_fail("Invalid SPIR-V image dimensionality: %s (%u)",
1852                   spirv_dim_to_string((SpvDim)w[3]), w[3]);
1853       }
1854 
1855       /* w[4]: as per Vulkan spec "Validation Rules within a Module",
1856        *       The “Depth” operand of OpTypeImage is ignored.
1857        */
1858       bool is_array = w[5];
1859       bool multisampled = w[6];
1860       unsigned sampled = w[7];
1861       SpvImageFormat format = w[8];
1862 
1863       if (count > 9)
1864          val->type->access_qualifier = w[9];
1865       else if (b->shader->info.stage == MESA_SHADER_KERNEL)
1866          /* Per the CL C spec: If no qualifier is provided, read_only is assumed. */
1867          val->type->access_qualifier = SpvAccessQualifierReadOnly;
1868       else
1869          val->type->access_qualifier = SpvAccessQualifierReadWrite;
1870 
1871       if (multisampled) {
1872          if (dim == GLSL_SAMPLER_DIM_2D)
1873             dim = GLSL_SAMPLER_DIM_MS;
1874          else if (dim == GLSL_SAMPLER_DIM_SUBPASS)
1875             dim = GLSL_SAMPLER_DIM_SUBPASS_MS;
1876          else
1877             vtn_fail("Unsupported multisampled image type");
1878       }
1879 
1880       val->type->image_format = translate_image_format(b, format);
1881 
1882       enum glsl_base_type sampled_base_type =
1883          glsl_get_base_type(sampled_type->type);
1884       if (sampled == 1) {
1885          val->type->glsl_image = glsl_texture_type(dim, is_array,
1886                                                    sampled_base_type);
1887       } else if (sampled == 2) {
1888          val->type->glsl_image = glsl_image_type(dim, is_array,
1889                                                  sampled_base_type);
1890       } else if (b->shader->info.stage == MESA_SHADER_KERNEL) {
1891          val->type->glsl_image = glsl_image_type(dim, is_array,
1892                                                  GLSL_TYPE_VOID);
1893       } else {
1894          vtn_fail("We need to know if the image will be sampled");
1895       }
1896       break;
1897    }
1898 
1899    case SpvOpTypeSampledImage: {
1900       val->type->base_type = vtn_base_type_sampled_image;
1901       val->type->image = vtn_get_type(b, w[2]);
1902 
1903       validate_image_type_for_sampled_image(
1904          b, val->type->image->glsl_image,
1905          "Image Type operand of OpTypeSampledImage");
1906 
1907       /* Sampled images are represented NIR as a vec2 SSA value where each
1908        * component is the result of a deref instruction.  The first component
1909        * is the image and the second is the sampler.  An OpLoad on an
1910        * OpTypeSampledImage pointer from UniformConstant memory just takes
1911        * the NIR deref from the pointer and duplicates it to both vector
1912        * components.
1913        */
1914       nir_address_format addr_format =
1915          vtn_mode_to_address_format(b, vtn_variable_mode_function);
1916       assert(nir_address_format_num_components(addr_format) == 1);
1917       unsigned bit_size = nir_address_format_bit_size(addr_format);
1918       assert(bit_size == 32 || bit_size == 64);
1919 
1920       enum glsl_base_type base_type =
1921          bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64;
1922       val->type->type = glsl_vector_type(base_type, 2);
1923       break;
1924    }
1925 
1926    case SpvOpTypeSampler:
1927       val->type->base_type = vtn_base_type_sampler;
1928 
1929       /* Samplers are represented in NIR as a scalar SSA value that is the
1930        * result of a deref instruction.  An OpLoad on an OpTypeSampler pointer
1931        * from UniformConstant memory just takes the NIR deref from the pointer
1932        * and turns it into an SSA value.
1933        */
1934       val->type->type = nir_address_format_to_glsl_type(
1935          vtn_mode_to_address_format(b, vtn_variable_mode_function));
1936       break;
1937 
1938    case SpvOpTypeAccelerationStructureKHR:
1939       val->type->base_type = vtn_base_type_accel_struct;
1940       val->type->type = glsl_uint64_t_type();
1941       break;
1942 
1943 
1944    case SpvOpTypeOpaque: {
1945       val->type->base_type = vtn_base_type_struct;
1946       const char *name = vtn_string_literal(b, &w[2], count - 2, NULL);
1947       val->type->type = glsl_struct_type(NULL, 0, name, false);
1948       break;
1949    }
1950 
1951    case SpvOpTypeRayQueryKHR: {
1952       val->type->base_type = vtn_base_type_ray_query;
1953       val->type->type = glsl_uint64_t_type();
1954       /* We may need to run queries on helper invocations. Here the parser
1955        * doesn't go through a deeper analysis on whether the result of a query
1956        * will be used in derivative instructions.
1957        *
1958        * An implementation willing to optimize this would look through the IR
1959        * and check if any derivative instruction uses the result of a query
1960        * and drop this flag if not.
1961        */
1962       if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
1963          val->type->access = ACCESS_INCLUDE_HELPERS;
1964       break;
1965    }
1966 
1967    case SpvOpTypeCooperativeMatrixKHR:
1968       vtn_handle_cooperative_type(b, val, opcode, w, count);
1969       break;
1970 
1971    case SpvOpTypeEvent:
1972       val->type->base_type = vtn_base_type_event;
1973       /*
1974        * this makes the event type compatible with pointer size due to LLVM 16.
1975        * llvm 17 fixes this properly, but with 16 and opaque ptrs it's still wrong.
1976        */
1977       val->type->type = b->shader->info.cs.ptr_size == 64 ? glsl_int64_t_type() : glsl_int_type();
1978       break;
1979 
1980    case SpvOpTypeDeviceEvent:
1981    case SpvOpTypeReserveId:
1982    case SpvOpTypeQueue:
1983    case SpvOpTypePipe:
1984    default:
1985       vtn_fail_with_opcode("Unhandled opcode", opcode);
1986    }
1987 
1988    vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
1989 
1990    if (val->type->base_type == vtn_base_type_struct &&
1991        (val->type->block || val->type->buffer_block)) {
1992       for (unsigned i = 0; i < val->type->length; i++) {
1993          vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]),
1994                      "Block and BufferBlock decorations cannot decorate a "
1995                      "structure type that is nested at any level inside "
1996                      "another structure type decorated with Block or "
1997                      "BufferBlock.");
1998       }
1999    }
2000 }
2001 
2002 static nir_constant *
vtn_null_constant(struct vtn_builder * b,struct vtn_type * type)2003 vtn_null_constant(struct vtn_builder *b, struct vtn_type *type)
2004 {
2005    nir_constant *c = rzalloc(b, nir_constant);
2006 
2007    switch (type->base_type) {
2008    case vtn_base_type_scalar:
2009    case vtn_base_type_vector:
2010       c->is_null_constant = true;
2011       /* Nothing to do here.  It's already initialized to zero */
2012       break;
2013 
2014    case vtn_base_type_pointer: {
2015       enum vtn_variable_mode mode = vtn_storage_class_to_mode(
2016          b, type->storage_class, type->deref, NULL);
2017       nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
2018 
2019       const nir_const_value *null_value = nir_address_format_null_value(addr_format);
2020       memcpy(c->values, null_value,
2021              sizeof(nir_const_value) * nir_address_format_num_components(addr_format));
2022       break;
2023    }
2024 
2025    case vtn_base_type_void:
2026    case vtn_base_type_image:
2027    case vtn_base_type_sampler:
2028    case vtn_base_type_sampled_image:
2029    case vtn_base_type_function:
2030    case vtn_base_type_event:
2031       /* For those we have to return something but it doesn't matter what. */
2032       break;
2033 
2034    case vtn_base_type_matrix:
2035    case vtn_base_type_array:
2036       vtn_assert(type->length > 0);
2037       c->is_null_constant = true;
2038       c->num_elements = type->length;
2039       c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2040 
2041       c->elements[0] = vtn_null_constant(b, type->array_element);
2042       for (unsigned i = 1; i < c->num_elements; i++)
2043          c->elements[i] = c->elements[0];
2044       break;
2045 
2046    case vtn_base_type_struct:
2047       c->is_null_constant = true;
2048       c->num_elements = type->length;
2049       c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2050       for (unsigned i = 0; i < c->num_elements; i++)
2051          c->elements[i] = vtn_null_constant(b, type->members[i]);
2052       break;
2053 
2054    default:
2055       vtn_fail("Invalid type for null constant");
2056    }
2057 
2058    return c;
2059 }
2060 
2061 static void
spec_constant_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,void * data)2062 spec_constant_decoration_cb(struct vtn_builder *b, UNUSED struct vtn_value *val,
2063                             ASSERTED int member,
2064                             const struct vtn_decoration *dec, void *data)
2065 {
2066    vtn_assert(member == -1);
2067    if (dec->decoration != SpvDecorationSpecId)
2068       return;
2069 
2070    nir_const_value *value = data;
2071    for (unsigned i = 0; i < b->num_specializations; i++) {
2072       if (b->specializations[i].id == dec->operands[0]) {
2073          *value = b->specializations[i].value;
2074          return;
2075       }
2076    }
2077 }
2078 
2079 static void
handle_workgroup_size_decoration_cb(struct vtn_builder * b,struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,UNUSED void * data)2080 handle_workgroup_size_decoration_cb(struct vtn_builder *b,
2081                                     struct vtn_value *val,
2082                                     ASSERTED int member,
2083                                     const struct vtn_decoration *dec,
2084                                     UNUSED void *data)
2085 {
2086    vtn_assert(member == -1);
2087    if (dec->decoration != SpvDecorationBuiltIn ||
2088        dec->operands[0] != SpvBuiltInWorkgroupSize)
2089       return;
2090 
2091    vtn_assert(val->type->type == glsl_vector_type(GLSL_TYPE_UINT, 3));
2092    b->workgroup_size_builtin = val;
2093 }
2094 
2095 static void
vtn_handle_constant(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)2096 vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
2097                     const uint32_t *w, unsigned count)
2098 {
2099    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
2100    val->constant = rzalloc(b, nir_constant);
2101    switch (opcode) {
2102    case SpvOpConstantTrue:
2103    case SpvOpConstantFalse:
2104    case SpvOpSpecConstantTrue:
2105    case SpvOpSpecConstantFalse: {
2106       vtn_fail_if(val->type->type != glsl_bool_type(),
2107                   "Result type of %s must be OpTypeBool",
2108                   spirv_op_to_string(opcode));
2109 
2110       bool bval = (opcode == SpvOpConstantTrue ||
2111                    opcode == SpvOpSpecConstantTrue);
2112 
2113       nir_const_value u32val = nir_const_value_for_uint(bval, 32);
2114 
2115       if (opcode == SpvOpSpecConstantTrue ||
2116           opcode == SpvOpSpecConstantFalse)
2117          vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32val);
2118 
2119       val->constant->values[0].b = u32val.u32 != 0;
2120       break;
2121    }
2122 
2123    case SpvOpConstant:
2124    case SpvOpSpecConstant: {
2125       vtn_fail_if(val->type->base_type != vtn_base_type_scalar,
2126                   "Result type of %s must be a scalar",
2127                   spirv_op_to_string(opcode));
2128       int bit_size = glsl_get_bit_size(val->type->type);
2129       switch (bit_size) {
2130       case 64:
2131          val->constant->values[0].u64 = vtn_u64_literal(&w[3]);
2132          break;
2133       case 32:
2134          val->constant->values[0].u32 = w[3];
2135          break;
2136       case 16:
2137          val->constant->values[0].u16 = w[3];
2138          break;
2139       case 8:
2140          val->constant->values[0].u8 = w[3];
2141          break;
2142       default:
2143          vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size);
2144       }
2145 
2146       if (opcode == SpvOpSpecConstant)
2147          vtn_foreach_decoration(b, val, spec_constant_decoration_cb,
2148                                 &val->constant->values[0]);
2149       break;
2150    }
2151 
2152    case SpvOpSpecConstantComposite:
2153    case SpvOpConstantComposite: {
2154       unsigned elem_count = count - 3;
2155       unsigned expected_length = val->type->base_type == vtn_base_type_cooperative_matrix ?
2156          1 : val->type->length;
2157       vtn_fail_if(elem_count != expected_length,
2158                   "%s has %u constituents, expected %u",
2159                   spirv_op_to_string(opcode), elem_count, expected_length);
2160 
2161       nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
2162       val->is_undef_constant = true;
2163       for (unsigned i = 0; i < elem_count; i++) {
2164          struct vtn_value *elem_val = vtn_untyped_value(b, w[i + 3]);
2165 
2166          if (elem_val->value_type == vtn_value_type_constant) {
2167             elems[i] = elem_val->constant;
2168             val->is_undef_constant = val->is_undef_constant &&
2169                                      elem_val->is_undef_constant;
2170          } else {
2171             vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
2172                         "only constants or undefs allowed for "
2173                         "SpvOpConstantComposite");
2174             /* to make it easier, just insert a NULL constant for now */
2175             elems[i] = vtn_null_constant(b, elem_val->type);
2176          }
2177       }
2178 
2179       switch (val->type->base_type) {
2180       case vtn_base_type_vector: {
2181          assert(glsl_type_is_vector(val->type->type));
2182          for (unsigned i = 0; i < elem_count; i++)
2183             val->constant->values[i] = elems[i]->values[0];
2184          break;
2185       }
2186 
2187       case vtn_base_type_matrix:
2188       case vtn_base_type_struct:
2189       case vtn_base_type_array:
2190          ralloc_steal(val->constant, elems);
2191          val->constant->num_elements = elem_count;
2192          val->constant->elements = elems;
2193          break;
2194 
2195       case vtn_base_type_cooperative_matrix:
2196          val->constant->values[0] = elems[0]->values[0];
2197          break;
2198 
2199       default:
2200          vtn_fail("Result type of %s must be a composite type",
2201                   spirv_op_to_string(opcode));
2202       }
2203       break;
2204    }
2205 
2206    case SpvOpSpecConstantOp: {
2207       nir_const_value u32op = nir_const_value_for_uint(w[3], 32);
2208       vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32op);
2209       SpvOp opcode = u32op.u32;
2210       switch (opcode) {
2211       case SpvOpVectorShuffle: {
2212          struct vtn_value *v0 = &b->values[w[4]];
2213          struct vtn_value *v1 = &b->values[w[5]];
2214 
2215          vtn_assert(v0->value_type == vtn_value_type_constant ||
2216                     v0->value_type == vtn_value_type_undef);
2217          vtn_assert(v1->value_type == vtn_value_type_constant ||
2218                     v1->value_type == vtn_value_type_undef);
2219 
2220          unsigned len0 = glsl_get_vector_elements(v0->type->type);
2221          unsigned len1 = glsl_get_vector_elements(v1->type->type);
2222 
2223          vtn_assert(len0 + len1 < 16);
2224 
2225          unsigned bit_size = glsl_get_bit_size(val->type->type);
2226          unsigned bit_size0 = glsl_get_bit_size(v0->type->type);
2227          unsigned bit_size1 = glsl_get_bit_size(v1->type->type);
2228 
2229          vtn_assert(bit_size == bit_size0 && bit_size == bit_size1);
2230          (void)bit_size0; (void)bit_size1;
2231 
2232          nir_const_value undef = { .u64 = 0xdeadbeefdeadbeef };
2233          nir_const_value combined[NIR_MAX_VEC_COMPONENTS * 2];
2234 
2235          if (v0->value_type == vtn_value_type_constant) {
2236             for (unsigned i = 0; i < len0; i++)
2237                combined[i] = v0->constant->values[i];
2238          }
2239          if (v1->value_type == vtn_value_type_constant) {
2240             for (unsigned i = 0; i < len1; i++)
2241                combined[len0 + i] = v1->constant->values[i];
2242          }
2243 
2244          for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
2245             uint32_t comp = w[i + 6];
2246             if (comp == (uint32_t)-1) {
2247                /* If component is not used, set the value to a known constant
2248                 * to detect if it is wrongly used.
2249                 */
2250                val->constant->values[j] = undef;
2251             } else {
2252                vtn_fail_if(comp >= len0 + len1,
2253                            "All Component literals must either be FFFFFFFF "
2254                            "or in [0, N - 1] (inclusive).");
2255                val->constant->values[j] = combined[comp];
2256             }
2257          }
2258          break;
2259       }
2260 
2261       case SpvOpCompositeExtract:
2262       case SpvOpCompositeInsert: {
2263          struct vtn_value *comp;
2264          unsigned deref_start;
2265          struct nir_constant **c;
2266          if (opcode == SpvOpCompositeExtract) {
2267             comp = vtn_value(b, w[4], vtn_value_type_constant);
2268             deref_start = 5;
2269             c = &comp->constant;
2270          } else {
2271             comp = vtn_value(b, w[5], vtn_value_type_constant);
2272             deref_start = 6;
2273             val->constant = nir_constant_clone(comp->constant,
2274                                                (nir_variable *)b);
2275             c = &val->constant;
2276          }
2277 
2278          int elem = -1;
2279          const struct vtn_type *type = comp->type;
2280          for (unsigned i = deref_start; i < count; i++) {
2281             vtn_fail_if(w[i] > type->length,
2282                         "%uth index of %s is %u but the type has only "
2283                         "%u elements", i - deref_start,
2284                         spirv_op_to_string(opcode), w[i], type->length);
2285 
2286             switch (type->base_type) {
2287             case vtn_base_type_vector:
2288                elem = w[i];
2289                type = type->array_element;
2290                break;
2291 
2292             case vtn_base_type_matrix:
2293             case vtn_base_type_array:
2294                c = &(*c)->elements[w[i]];
2295                type = type->array_element;
2296                break;
2297 
2298             case vtn_base_type_struct:
2299                c = &(*c)->elements[w[i]];
2300                type = type->members[w[i]];
2301                break;
2302 
2303             default:
2304                vtn_fail("%s must only index into composite types",
2305                         spirv_op_to_string(opcode));
2306             }
2307          }
2308 
2309          if (opcode == SpvOpCompositeExtract) {
2310             if (elem == -1) {
2311                val->constant = *c;
2312             } else {
2313                unsigned num_components = type->length;
2314                for (unsigned i = 0; i < num_components; i++)
2315                   val->constant->values[i] = (*c)->values[elem + i];
2316             }
2317          } else {
2318             struct vtn_value *insert =
2319                vtn_value(b, w[4], vtn_value_type_constant);
2320             vtn_assert(insert->type == type);
2321             if (elem == -1) {
2322                *c = insert->constant;
2323             } else {
2324                unsigned num_components = type->length;
2325                for (unsigned i = 0; i < num_components; i++)
2326                   (*c)->values[elem + i] = insert->constant->values[i];
2327             }
2328          }
2329          break;
2330       }
2331 
2332       default: {
2333          bool swap;
2334          nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(val->type->type);
2335          nir_alu_type src_alu_type = dst_alu_type;
2336          unsigned num_components = glsl_get_vector_elements(val->type->type);
2337          unsigned bit_size;
2338 
2339          vtn_assert(count <= 7);
2340 
2341          switch (opcode) {
2342          case SpvOpSConvert:
2343          case SpvOpFConvert:
2344          case SpvOpUConvert:
2345             /* We have a source in a conversion */
2346             src_alu_type =
2347                nir_get_nir_type_for_glsl_type(vtn_get_value_type(b, w[4])->type);
2348             /* We use the bitsize of the conversion source to evaluate the opcode later */
2349             bit_size = glsl_get_bit_size(vtn_get_value_type(b, w[4])->type);
2350             break;
2351          default:
2352             bit_size = glsl_get_bit_size(val->type->type);
2353          };
2354 
2355          bool exact;
2356          nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
2357                                                      nir_alu_type_get_type_size(src_alu_type),
2358                                                      nir_alu_type_get_type_size(dst_alu_type));
2359 
2360          /* No SPIR-V opcodes handled through this path should set exact.
2361           * Since it is ignored, assert on it.
2362           */
2363          assert(!exact);
2364 
2365          nir_const_value src[3][NIR_MAX_VEC_COMPONENTS];
2366 
2367          for (unsigned i = 0; i < count - 4; i++) {
2368             struct vtn_value *src_val =
2369                vtn_value(b, w[4 + i], vtn_value_type_constant);
2370 
2371             /* If this is an unsized source, pull the bit size from the
2372              * source; otherwise, we'll use the bit size from the destination.
2373              */
2374             if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]))
2375                bit_size = glsl_get_bit_size(src_val->type->type);
2376 
2377             unsigned src_comps = nir_op_infos[op].input_sizes[i] ?
2378                                  nir_op_infos[op].input_sizes[i] :
2379                                  num_components;
2380 
2381             unsigned j = swap ? 1 - i : i;
2382             for (unsigned c = 0; c < src_comps; c++)
2383                src[j][c] = src_val->constant->values[c];
2384          }
2385 
2386          /* fix up fixed size sources */
2387          switch (op) {
2388          case nir_op_ishl:
2389          case nir_op_ishr:
2390          case nir_op_ushr: {
2391             if (bit_size == 32)
2392                break;
2393             for (unsigned i = 0; i < num_components; ++i) {
2394                switch (bit_size) {
2395                case 64: src[1][i].u32 = src[1][i].u64; break;
2396                case 16: src[1][i].u32 = src[1][i].u16; break;
2397                case  8: src[1][i].u32 = src[1][i].u8;  break;
2398                }
2399             }
2400             break;
2401          }
2402          default:
2403             break;
2404          }
2405 
2406          nir_const_value *srcs[3] = {
2407             src[0], src[1], src[2],
2408          };
2409          nir_eval_const_opcode(op, val->constant->values,
2410                                num_components, bit_size, srcs,
2411                                b->shader->info.float_controls_execution_mode);
2412          break;
2413       } /* default */
2414       }
2415       break;
2416    }
2417 
2418    case SpvOpConstantNull:
2419       val->constant = vtn_null_constant(b, val->type);
2420       val->is_null_constant = true;
2421       break;
2422 
2423    default:
2424       vtn_fail_with_opcode("Unhandled opcode", opcode);
2425    }
2426 
2427    /* Now that we have the value, update the workgroup size if needed */
2428    if (gl_shader_stage_uses_workgroup(b->entry_point_stage))
2429       vtn_foreach_decoration(b, val, handle_workgroup_size_decoration_cb,
2430                              NULL);
2431 }
2432 
2433 static void
vtn_split_barrier_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics,SpvMemorySemanticsMask * before,SpvMemorySemanticsMask * after)2434 vtn_split_barrier_semantics(struct vtn_builder *b,
2435                             SpvMemorySemanticsMask semantics,
2436                             SpvMemorySemanticsMask *before,
2437                             SpvMemorySemanticsMask *after)
2438 {
2439    /* For memory semantics embedded in operations, we split them into up to
2440     * two barriers, to be added before and after the operation.  This is less
2441     * strict than if we propagated until the final backend stage, but still
2442     * result in correct execution.
2443     *
2444     * A further improvement could be pipe this information (and use!) into the
2445     * next compiler layers, at the expense of making the handling of barriers
2446     * more complicated.
2447     */
2448 
2449    *before = SpvMemorySemanticsMaskNone;
2450    *after = SpvMemorySemanticsMaskNone;
2451 
2452    SpvMemorySemanticsMask order_semantics =
2453       semantics & (SpvMemorySemanticsAcquireMask |
2454                    SpvMemorySemanticsReleaseMask |
2455                    SpvMemorySemanticsAcquireReleaseMask |
2456                    SpvMemorySemanticsSequentiallyConsistentMask);
2457 
2458    if (util_bitcount(order_semantics) > 1) {
2459       /* Old GLSLang versions incorrectly set all the ordering bits.  This was
2460        * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2461        * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2462        */
2463       vtn_warn("Multiple memory ordering semantics specified, "
2464                "assuming AcquireRelease.");
2465       order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2466    }
2467 
2468    const SpvMemorySemanticsMask av_vis_semantics =
2469       semantics & (SpvMemorySemanticsMakeAvailableMask |
2470                    SpvMemorySemanticsMakeVisibleMask);
2471 
2472    const SpvMemorySemanticsMask storage_semantics =
2473       semantics & (SpvMemorySemanticsUniformMemoryMask |
2474                    SpvMemorySemanticsSubgroupMemoryMask |
2475                    SpvMemorySemanticsWorkgroupMemoryMask |
2476                    SpvMemorySemanticsCrossWorkgroupMemoryMask |
2477                    SpvMemorySemanticsAtomicCounterMemoryMask |
2478                    SpvMemorySemanticsImageMemoryMask |
2479                    SpvMemorySemanticsOutputMemoryMask);
2480 
2481    const SpvMemorySemanticsMask other_semantics =
2482       semantics & ~(order_semantics | av_vis_semantics | storage_semantics |
2483                     SpvMemorySemanticsVolatileMask);
2484 
2485    if (other_semantics)
2486       vtn_warn("Ignoring unhandled memory semantics: %u\n", other_semantics);
2487 
2488    /* SequentiallyConsistent is treated as AcquireRelease. */
2489 
2490    /* The RELEASE barrier happens BEFORE the operation, and it is usually
2491     * associated with a Store.  All the write operations with a matching
2492     * semantics will not be reordered after the Store.
2493     */
2494    if (order_semantics & (SpvMemorySemanticsReleaseMask |
2495                           SpvMemorySemanticsAcquireReleaseMask |
2496                           SpvMemorySemanticsSequentiallyConsistentMask)) {
2497       *before |= SpvMemorySemanticsReleaseMask | storage_semantics;
2498    }
2499 
2500    /* The ACQUIRE barrier happens AFTER the operation, and it is usually
2501     * associated with a Load.  All the operations with a matching semantics
2502     * will not be reordered before the Load.
2503     */
2504    if (order_semantics & (SpvMemorySemanticsAcquireMask |
2505                           SpvMemorySemanticsAcquireReleaseMask |
2506                           SpvMemorySemanticsSequentiallyConsistentMask)) {
2507       *after |= SpvMemorySemanticsAcquireMask | storage_semantics;
2508    }
2509 
2510    if (av_vis_semantics & SpvMemorySemanticsMakeVisibleMask)
2511       *before |= SpvMemorySemanticsMakeVisibleMask | storage_semantics;
2512 
2513    if (av_vis_semantics & SpvMemorySemanticsMakeAvailableMask)
2514       *after |= SpvMemorySemanticsMakeAvailableMask | storage_semantics;
2515 }
2516 
2517 static nir_memory_semantics
vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2518 vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder *b,
2519                                        SpvMemorySemanticsMask semantics)
2520 {
2521    nir_memory_semantics nir_semantics = 0;
2522 
2523    SpvMemorySemanticsMask order_semantics =
2524       semantics & (SpvMemorySemanticsAcquireMask |
2525                    SpvMemorySemanticsReleaseMask |
2526                    SpvMemorySemanticsAcquireReleaseMask |
2527                    SpvMemorySemanticsSequentiallyConsistentMask);
2528 
2529    if (util_bitcount(order_semantics) > 1) {
2530       /* Old GLSLang versions incorrectly set all the ordering bits.  This was
2531        * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2532        * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2533        */
2534       vtn_warn("Multiple memory ordering semantics bits specified, "
2535                "assuming AcquireRelease.");
2536       order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2537    }
2538 
2539    switch (order_semantics) {
2540    case 0:
2541       /* Not an ordering barrier. */
2542       break;
2543 
2544    case SpvMemorySemanticsAcquireMask:
2545       nir_semantics = NIR_MEMORY_ACQUIRE;
2546       break;
2547 
2548    case SpvMemorySemanticsReleaseMask:
2549       nir_semantics = NIR_MEMORY_RELEASE;
2550       break;
2551 
2552    case SpvMemorySemanticsSequentiallyConsistentMask:
2553       FALLTHROUGH; /* Treated as AcquireRelease in Vulkan. */
2554    case SpvMemorySemanticsAcquireReleaseMask:
2555       nir_semantics = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE;
2556       break;
2557 
2558    default:
2559       unreachable("Invalid memory order semantics");
2560    }
2561 
2562    if (semantics & SpvMemorySemanticsMakeAvailableMask) {
2563       vtn_fail_if(!b->options->caps.vk_memory_model,
2564                   "To use MakeAvailable memory semantics the VulkanMemoryModel "
2565                   "capability must be declared.");
2566       nir_semantics |= NIR_MEMORY_MAKE_AVAILABLE;
2567    }
2568 
2569    if (semantics & SpvMemorySemanticsMakeVisibleMask) {
2570       vtn_fail_if(!b->options->caps.vk_memory_model,
2571                   "To use MakeVisible memory semantics the VulkanMemoryModel "
2572                   "capability must be declared.");
2573       nir_semantics |= NIR_MEMORY_MAKE_VISIBLE;
2574    }
2575 
2576    return nir_semantics;
2577 }
2578 
2579 static nir_variable_mode
vtn_mem_semantics_to_nir_var_modes(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2580 vtn_mem_semantics_to_nir_var_modes(struct vtn_builder *b,
2581                                    SpvMemorySemanticsMask semantics)
2582 {
2583    /* Vulkan Environment for SPIR-V says "SubgroupMemory, CrossWorkgroupMemory,
2584     * and AtomicCounterMemory are ignored".
2585     */
2586    if (b->options->environment == NIR_SPIRV_VULKAN) {
2587       semantics &= ~(SpvMemorySemanticsSubgroupMemoryMask |
2588                      SpvMemorySemanticsCrossWorkgroupMemoryMask |
2589                      SpvMemorySemanticsAtomicCounterMemoryMask);
2590    }
2591 
2592    nir_variable_mode modes = 0;
2593    if (semantics & SpvMemorySemanticsUniformMemoryMask)
2594       modes |= nir_var_mem_ssbo | nir_var_mem_global;
2595    if (semantics & SpvMemorySemanticsImageMemoryMask)
2596       modes |= nir_var_image;
2597    if (semantics & SpvMemorySemanticsWorkgroupMemoryMask)
2598       modes |= nir_var_mem_shared;
2599    if (semantics & SpvMemorySemanticsCrossWorkgroupMemoryMask)
2600       modes |= nir_var_mem_global;
2601    if (semantics & SpvMemorySemanticsOutputMemoryMask) {
2602       modes |= nir_var_shader_out;
2603 
2604       if (b->shader->info.stage == MESA_SHADER_TASK)
2605          modes |= nir_var_mem_task_payload;
2606    }
2607 
2608    if (semantics & SpvMemorySemanticsAtomicCounterMemoryMask) {
2609       /* There's no nir_var_atomic_counter, but since atomic counters are
2610        * lowered to SSBOs, we use nir_var_mem_ssbo instead.
2611        */
2612       modes |= nir_var_mem_ssbo;
2613    }
2614 
2615    return modes;
2616 }
2617 
2618 mesa_scope
vtn_translate_scope(struct vtn_builder * b,SpvScope scope)2619 vtn_translate_scope(struct vtn_builder *b, SpvScope scope)
2620 {
2621    switch (scope) {
2622    case SpvScopeDevice:
2623       vtn_fail_if(b->options->caps.vk_memory_model &&
2624                   !b->options->caps.vk_memory_model_device_scope,
2625                   "If the Vulkan memory model is declared and any instruction "
2626                   "uses Device scope, the VulkanMemoryModelDeviceScope "
2627                   "capability must be declared.");
2628       return SCOPE_DEVICE;
2629 
2630    case SpvScopeQueueFamily:
2631       vtn_fail_if(!b->options->caps.vk_memory_model,
2632                   "To use Queue Family scope, the VulkanMemoryModel capability "
2633                   "must be declared.");
2634       return SCOPE_QUEUE_FAMILY;
2635 
2636    case SpvScopeWorkgroup:
2637       return SCOPE_WORKGROUP;
2638 
2639    case SpvScopeSubgroup:
2640       return SCOPE_SUBGROUP;
2641 
2642    case SpvScopeInvocation:
2643       return SCOPE_INVOCATION;
2644 
2645    case SpvScopeShaderCallKHR:
2646       return SCOPE_SHADER_CALL;
2647 
2648    default:
2649       vtn_fail("Invalid memory scope");
2650    }
2651 }
2652 
2653 static void
vtn_emit_scoped_control_barrier(struct vtn_builder * b,SpvScope exec_scope,SpvScope mem_scope,SpvMemorySemanticsMask semantics)2654 vtn_emit_scoped_control_barrier(struct vtn_builder *b, SpvScope exec_scope,
2655                                 SpvScope mem_scope,
2656                                 SpvMemorySemanticsMask semantics)
2657 {
2658    nir_memory_semantics nir_semantics =
2659       vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2660    nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2661    mesa_scope nir_exec_scope = vtn_translate_scope(b, exec_scope);
2662 
2663    /* Memory semantics is optional for OpControlBarrier. */
2664    mesa_scope nir_mem_scope;
2665    if (nir_semantics == 0 || modes == 0)
2666       nir_mem_scope = SCOPE_NONE;
2667    else
2668       nir_mem_scope = vtn_translate_scope(b, mem_scope);
2669 
2670    nir_barrier(&b->nb, .execution_scope=nir_exec_scope, .memory_scope=nir_mem_scope,
2671                        .memory_semantics=nir_semantics, .memory_modes=modes);
2672 }
2673 
2674 void
vtn_emit_memory_barrier(struct vtn_builder * b,SpvScope scope,SpvMemorySemanticsMask semantics)2675 vtn_emit_memory_barrier(struct vtn_builder *b, SpvScope scope,
2676                         SpvMemorySemanticsMask semantics)
2677 {
2678    nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2679    nir_memory_semantics nir_semantics =
2680       vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2681 
2682    /* No barrier to add. */
2683    if (nir_semantics == 0 || modes == 0)
2684       return;
2685 
2686    nir_barrier(&b->nb, .memory_scope=vtn_translate_scope(b, scope),
2687                        .memory_semantics=nir_semantics,
2688                        .memory_modes=modes);
2689 }
2690 
2691 struct vtn_ssa_value *
vtn_create_ssa_value(struct vtn_builder * b,const struct glsl_type * type)2692 vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
2693 {
2694    /* Always use bare types for SSA values for a couple of reasons:
2695     *
2696     *  1. Code which emits deref chains should never listen to the explicit
2697     *     layout information on the SSA value if any exists.  If we've
2698     *     accidentally been relying on this, we want to find those bugs.
2699     *
2700     *  2. We want to be able to quickly check that an SSA value being assigned
2701     *     to a SPIR-V value has the right type.  Using bare types everywhere
2702     *     ensures that we can pointer-compare.
2703     */
2704    struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
2705    val->type = glsl_get_bare_type(type);
2706 
2707 
2708    if (!glsl_type_is_vector_or_scalar(type)) {
2709       unsigned elems = glsl_get_length(val->type);
2710       val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
2711       if (glsl_type_is_array_or_matrix(type) || glsl_type_is_cmat(type)) {
2712          const struct glsl_type *elem_type = glsl_get_array_element(type);
2713          for (unsigned i = 0; i < elems; i++)
2714             val->elems[i] = vtn_create_ssa_value(b, elem_type);
2715       } else {
2716          vtn_assert(glsl_type_is_struct_or_ifc(type));
2717          for (unsigned i = 0; i < elems; i++) {
2718             const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
2719             val->elems[i] = vtn_create_ssa_value(b, elem_type);
2720          }
2721       }
2722    }
2723 
2724    return val;
2725 }
2726 
2727 void
vtn_set_ssa_value_var(struct vtn_builder * b,struct vtn_ssa_value * ssa,nir_variable * var)2728 vtn_set_ssa_value_var(struct vtn_builder *b, struct vtn_ssa_value *ssa, nir_variable *var)
2729 {
2730    vtn_assert(glsl_type_is_cmat(var->type));
2731    vtn_assert(var->type == ssa->type);
2732    ssa->is_variable = true;
2733    ssa->var = var;
2734 }
2735 
2736 static nir_tex_src
vtn_tex_src(struct vtn_builder * b,unsigned index,nir_tex_src_type type)2737 vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
2738 {
2739    return nir_tex_src_for_ssa(type, vtn_get_nir_ssa(b, index));
2740 }
2741 
2742 static uint32_t
image_operand_arg(struct vtn_builder * b,const uint32_t * w,uint32_t count,uint32_t mask_idx,SpvImageOperandsMask op)2743 image_operand_arg(struct vtn_builder *b, const uint32_t *w, uint32_t count,
2744                   uint32_t mask_idx, SpvImageOperandsMask op)
2745 {
2746    static const SpvImageOperandsMask ops_with_arg =
2747       SpvImageOperandsBiasMask |
2748       SpvImageOperandsLodMask |
2749       SpvImageOperandsGradMask |
2750       SpvImageOperandsConstOffsetMask |
2751       SpvImageOperandsOffsetMask |
2752       SpvImageOperandsConstOffsetsMask |
2753       SpvImageOperandsSampleMask |
2754       SpvImageOperandsMinLodMask |
2755       SpvImageOperandsMakeTexelAvailableMask |
2756       SpvImageOperandsMakeTexelVisibleMask;
2757 
2758    assert(util_bitcount(op) == 1);
2759    assert(w[mask_idx] & op);
2760    assert(op & ops_with_arg);
2761 
2762    uint32_t idx = util_bitcount(w[mask_idx] & (op - 1) & ops_with_arg) + 1;
2763 
2764    /* Adjust indices for operands with two arguments. */
2765    static const SpvImageOperandsMask ops_with_two_args =
2766       SpvImageOperandsGradMask;
2767    idx += util_bitcount(w[mask_idx] & (op - 1) & ops_with_two_args);
2768 
2769    idx += mask_idx;
2770 
2771    vtn_fail_if(idx + (op & ops_with_two_args ? 1 : 0) >= count,
2772                "Image op claims to have %s but does not enough "
2773                "following operands", spirv_imageoperands_to_string(op));
2774 
2775    return idx;
2776 }
2777 
2778 static void
non_uniform_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)2779 non_uniform_decoration_cb(struct vtn_builder *b,
2780                           struct vtn_value *val, int member,
2781                           const struct vtn_decoration *dec, void *void_ctx)
2782 {
2783    enum gl_access_qualifier *access = void_ctx;
2784    switch (dec->decoration) {
2785    case SpvDecorationNonUniformEXT:
2786       *access |= ACCESS_NON_UNIFORM;
2787       break;
2788 
2789    default:
2790       break;
2791    }
2792 }
2793 
2794 /* Apply SignExtend/ZeroExtend operands to get the actual result type for
2795  * image read/sample operations and source type for write operations.
2796  */
2797 static nir_alu_type
get_image_type(struct vtn_builder * b,nir_alu_type type,unsigned operands)2798 get_image_type(struct vtn_builder *b, nir_alu_type type, unsigned operands)
2799 {
2800    unsigned extend_operands =
2801       operands & (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask);
2802    vtn_fail_if(nir_alu_type_get_base_type(type) == nir_type_float && extend_operands,
2803                "SignExtend/ZeroExtend used on floating-point texel type");
2804    vtn_fail_if(extend_operands ==
2805                (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask),
2806                "SignExtend and ZeroExtend both specified");
2807 
2808    if (operands & SpvImageOperandsSignExtendMask)
2809       return nir_type_int | nir_alu_type_get_type_size(type);
2810    if (operands & SpvImageOperandsZeroExtendMask)
2811       return nir_type_uint | nir_alu_type_get_type_size(type);
2812 
2813    return type;
2814 }
2815 
2816 static void
vtn_handle_texture(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)2817 vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
2818                    const uint32_t *w, unsigned count)
2819 {
2820    if (opcode == SpvOpSampledImage) {
2821       struct vtn_sampled_image si = {
2822          .image = vtn_get_image(b, w[3], NULL),
2823          .sampler = vtn_get_sampler(b, w[4]),
2824       };
2825 
2826       validate_image_type_for_sampled_image(
2827          b, si.image->type,
2828          "Type of Image operand of OpSampledImage");
2829 
2830       enum gl_access_qualifier access = 0;
2831       vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
2832                              non_uniform_decoration_cb, &access);
2833       vtn_foreach_decoration(b, vtn_untyped_value(b, w[4]),
2834                              non_uniform_decoration_cb, &access);
2835 
2836       vtn_push_sampled_image(b, w[2], si, access & ACCESS_NON_UNIFORM);
2837       return;
2838    } else if (opcode == SpvOpImage) {
2839       struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
2840 
2841       enum gl_access_qualifier access = 0;
2842       vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
2843                              non_uniform_decoration_cb, &access);
2844 
2845       vtn_push_image(b, w[2], si.image, access & ACCESS_NON_UNIFORM);
2846       return;
2847    } else if (opcode == SpvOpImageSparseTexelsResident) {
2848       nir_def *code = vtn_get_nir_ssa(b, w[3]);
2849       vtn_push_nir_ssa(b, w[2], nir_is_sparse_texels_resident(&b->nb, 1, code));
2850       return;
2851    }
2852 
2853    nir_deref_instr *image = NULL, *sampler = NULL;
2854    struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
2855    if (sampled_val->type->base_type == vtn_base_type_sampled_image) {
2856       struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
2857       image = si.image;
2858       sampler = si.sampler;
2859    } else {
2860       image = vtn_get_image(b, w[3], NULL);
2861    }
2862 
2863    const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image->type);
2864    const bool is_array = glsl_sampler_type_is_array(image->type);
2865    nir_alu_type dest_type = nir_type_invalid;
2866 
2867    /* Figure out the base texture operation */
2868    nir_texop texop;
2869    switch (opcode) {
2870    case SpvOpImageSampleImplicitLod:
2871    case SpvOpImageSparseSampleImplicitLod:
2872    case SpvOpImageSampleDrefImplicitLod:
2873    case SpvOpImageSparseSampleDrefImplicitLod:
2874       vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
2875                  sampler_dim != GLSL_SAMPLER_DIM_MS &&
2876                  sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
2877       texop = nir_texop_tex;
2878       break;
2879 
2880    case SpvOpImageSampleProjImplicitLod:
2881    case SpvOpImageSampleProjDrefImplicitLod:
2882       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2883                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
2884                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
2885                  sampler_dim == GLSL_SAMPLER_DIM_RECT);
2886       vtn_assert(!is_array);
2887       texop = nir_texop_tex;
2888       break;
2889 
2890    case SpvOpImageSampleExplicitLod:
2891    case SpvOpImageSparseSampleExplicitLod:
2892    case SpvOpImageSampleDrefExplicitLod:
2893    case SpvOpImageSparseSampleDrefExplicitLod:
2894       vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
2895                  sampler_dim != GLSL_SAMPLER_DIM_MS &&
2896                  sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
2897       texop = nir_texop_txl;
2898       break;
2899 
2900    case SpvOpImageSampleProjExplicitLod:
2901    case SpvOpImageSampleProjDrefExplicitLod:
2902       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2903                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
2904                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
2905                  sampler_dim == GLSL_SAMPLER_DIM_RECT);
2906       vtn_assert(!is_array);
2907       texop = nir_texop_txl;
2908       break;
2909 
2910    case SpvOpImageFetch:
2911    case SpvOpImageSparseFetch:
2912       vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_CUBE);
2913       if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
2914          texop = nir_texop_txf_ms;
2915       } else {
2916          texop = nir_texop_txf;
2917       }
2918       break;
2919 
2920    case SpvOpImageGather:
2921    case SpvOpImageSparseGather:
2922    case SpvOpImageDrefGather:
2923    case SpvOpImageSparseDrefGather:
2924       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_2D ||
2925                  sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
2926                  sampler_dim == GLSL_SAMPLER_DIM_RECT);
2927       texop = nir_texop_tg4;
2928       break;
2929 
2930    case SpvOpImageQuerySizeLod:
2931       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2932                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
2933                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
2934                  sampler_dim == GLSL_SAMPLER_DIM_CUBE);
2935       texop = nir_texop_txs;
2936       dest_type = nir_type_int32;
2937       break;
2938 
2939    case SpvOpImageQuerySize:
2940       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2941                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
2942                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
2943                  sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
2944                  sampler_dim == GLSL_SAMPLER_DIM_RECT ||
2945                  sampler_dim == GLSL_SAMPLER_DIM_MS ||
2946                  sampler_dim == GLSL_SAMPLER_DIM_BUF);
2947       texop = nir_texop_txs;
2948       dest_type = nir_type_int32;
2949       break;
2950 
2951    case SpvOpImageQueryLod:
2952       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2953                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
2954                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
2955                  sampler_dim == GLSL_SAMPLER_DIM_CUBE);
2956       texop = nir_texop_lod;
2957       dest_type = nir_type_float32;
2958       break;
2959 
2960    case SpvOpImageQueryLevels:
2961       /* This operation is not valid for a MS image but present in some old
2962        * shaders.  Just return 1 in those cases.
2963        */
2964       if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
2965          vtn_warn("OpImageQueryLevels 'Sampled Image' should have an MS of 0, "
2966                   "but found MS of 1.  Replacing query with constant value 1.");
2967          vtn_push_nir_ssa(b, w[2], nir_imm_int(&b->nb, 1));
2968          return;
2969       }
2970       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
2971                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
2972                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
2973                  sampler_dim == GLSL_SAMPLER_DIM_CUBE);
2974       texop = nir_texop_query_levels;
2975       dest_type = nir_type_int32;
2976       break;
2977 
2978    case SpvOpImageQuerySamples:
2979       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS);
2980       texop = nir_texop_texture_samples;
2981       dest_type = nir_type_int32;
2982       break;
2983 
2984    case SpvOpFragmentFetchAMD:
2985       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
2986                  sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2987       texop = nir_texop_fragment_fetch_amd;
2988       break;
2989 
2990    case SpvOpFragmentMaskFetchAMD:
2991       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
2992                  sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2993       texop = nir_texop_fragment_mask_fetch_amd;
2994       dest_type = nir_type_uint32;
2995       break;
2996 
2997    default:
2998       vtn_fail_with_opcode("Unhandled opcode", opcode);
2999    }
3000 
3001    nir_tex_src srcs[10]; /* 10 should be enough */
3002    nir_tex_src *p = srcs;
3003 
3004    p->src = nir_src_for_ssa(&image->def);
3005    p->src_type = nir_tex_src_texture_deref;
3006    p++;
3007 
3008    switch (texop) {
3009    case nir_texop_tex:
3010    case nir_texop_txb:
3011    case nir_texop_txl:
3012    case nir_texop_txd:
3013    case nir_texop_tg4:
3014    case nir_texop_lod:
3015       vtn_fail_if(sampler == NULL,
3016                   "%s requires an image of type OpTypeSampledImage",
3017                   spirv_op_to_string(opcode));
3018       p->src = nir_src_for_ssa(&sampler->def);
3019       p->src_type = nir_tex_src_sampler_deref;
3020       p++;
3021       break;
3022    case nir_texop_txf:
3023    case nir_texop_txf_ms:
3024    case nir_texop_txs:
3025    case nir_texop_query_levels:
3026    case nir_texop_texture_samples:
3027    case nir_texop_samples_identical:
3028    case nir_texop_fragment_fetch_amd:
3029    case nir_texop_fragment_mask_fetch_amd:
3030       /* These don't */
3031       break;
3032    case nir_texop_txf_ms_fb:
3033       vtn_fail("unexpected nir_texop_txf_ms_fb");
3034       break;
3035    case nir_texop_txf_ms_mcs_intel:
3036       vtn_fail("unexpected nir_texop_txf_ms_mcs");
3037       break;
3038    case nir_texop_tex_prefetch:
3039       vtn_fail("unexpected nir_texop_tex_prefetch");
3040       break;
3041    case nir_texop_descriptor_amd:
3042    case nir_texop_sampler_descriptor_amd:
3043       vtn_fail("unexpected nir_texop_*descriptor_amd");
3044       break;
3045    case nir_texop_lod_bias_agx:
3046       vtn_fail("unexpected nir_texop_lod_bias_agx");
3047       break;
3048    case nir_texop_hdr_dim_nv:
3049    case nir_texop_tex_type_nv:
3050       vtn_fail("unexpected nir_texop_*_nv");
3051       break;
3052    }
3053 
3054    unsigned idx = 4;
3055 
3056    struct nir_def *coord;
3057    unsigned coord_components;
3058    switch (opcode) {
3059    case SpvOpImageSampleImplicitLod:
3060    case SpvOpImageSparseSampleImplicitLod:
3061    case SpvOpImageSampleExplicitLod:
3062    case SpvOpImageSparseSampleExplicitLod:
3063    case SpvOpImageSampleDrefImplicitLod:
3064    case SpvOpImageSparseSampleDrefImplicitLod:
3065    case SpvOpImageSampleDrefExplicitLod:
3066    case SpvOpImageSparseSampleDrefExplicitLod:
3067    case SpvOpImageSampleProjImplicitLod:
3068    case SpvOpImageSampleProjExplicitLod:
3069    case SpvOpImageSampleProjDrefImplicitLod:
3070    case SpvOpImageSampleProjDrefExplicitLod:
3071    case SpvOpImageFetch:
3072    case SpvOpImageSparseFetch:
3073    case SpvOpImageGather:
3074    case SpvOpImageSparseGather:
3075    case SpvOpImageDrefGather:
3076    case SpvOpImageSparseDrefGather:
3077    case SpvOpImageQueryLod:
3078    case SpvOpFragmentFetchAMD:
3079    case SpvOpFragmentMaskFetchAMD: {
3080       /* All these types have the coordinate as their first real argument */
3081       coord_components = glsl_get_sampler_dim_coordinate_components(sampler_dim);
3082 
3083       if (is_array && texop != nir_texop_lod)
3084          coord_components++;
3085 
3086       struct vtn_ssa_value *coord_val = vtn_ssa_value(b, w[idx++]);
3087       coord = coord_val->def;
3088       /* From the SPIR-V spec verxion 1.5, rev. 5:
3089        *
3090        *    "Coordinate must be a scalar or vector of floating-point type. It
3091        *    contains (u[, v] ... [, array layer]) as needed by the definition
3092        *    of Sampled Image. It may be a vector larger than needed, but all
3093        *    unused components appear after all used components."
3094        */
3095       vtn_fail_if(coord->num_components < coord_components,
3096                   "Coordinate value passed has fewer components than sampler dimensionality.");
3097       p->src = nir_src_for_ssa(nir_trim_vector(&b->nb, coord, coord_components));
3098 
3099       /* OpenCL allows integer sampling coordinates */
3100       if (glsl_type_is_integer(coord_val->type) &&
3101           opcode == SpvOpImageSampleExplicitLod) {
3102          vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
3103                      "Unless the Kernel capability is being used, the coordinate parameter "
3104                      "OpImageSampleExplicitLod must be floating point.");
3105 
3106          nir_def *coords[4];
3107          nir_def *f0_5 = nir_imm_float(&b->nb, 0.5);
3108          for (unsigned i = 0; i < coord_components; i++) {
3109             coords[i] = nir_i2f32(&b->nb, nir_channel(&b->nb, p->src.ssa, i));
3110 
3111             if (!is_array || i != coord_components - 1)
3112                coords[i] = nir_fadd(&b->nb, coords[i], f0_5);
3113          }
3114 
3115          p->src = nir_src_for_ssa(nir_vec(&b->nb, coords, coord_components));
3116       }
3117 
3118       p->src_type = nir_tex_src_coord;
3119       p++;
3120       break;
3121    }
3122 
3123    default:
3124       coord = NULL;
3125       coord_components = 0;
3126       break;
3127    }
3128 
3129    switch (opcode) {
3130    case SpvOpImageSampleProjImplicitLod:
3131    case SpvOpImageSampleProjExplicitLod:
3132    case SpvOpImageSampleProjDrefImplicitLod:
3133    case SpvOpImageSampleProjDrefExplicitLod:
3134       /* These have the projector as the last coordinate component */
3135       p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components));
3136       p->src_type = nir_tex_src_projector;
3137       p++;
3138       break;
3139 
3140    default:
3141       break;
3142    }
3143 
3144    bool is_shadow = false;
3145    unsigned gather_component = 0;
3146    switch (opcode) {
3147    case SpvOpImageSampleDrefImplicitLod:
3148    case SpvOpImageSparseSampleDrefImplicitLod:
3149    case SpvOpImageSampleDrefExplicitLod:
3150    case SpvOpImageSparseSampleDrefExplicitLod:
3151    case SpvOpImageSampleProjDrefImplicitLod:
3152    case SpvOpImageSampleProjDrefExplicitLod:
3153    case SpvOpImageDrefGather:
3154    case SpvOpImageSparseDrefGather:
3155       /* These all have an explicit depth value as their next source */
3156       is_shadow = true;
3157       (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
3158       break;
3159 
3160    case SpvOpImageGather:
3161    case SpvOpImageSparseGather:
3162       /* This has a component as its next source */
3163       gather_component = vtn_constant_uint(b, w[idx++]);
3164       break;
3165 
3166    default:
3167       break;
3168    }
3169 
3170    bool is_sparse = false;
3171    switch (opcode) {
3172    case SpvOpImageSparseSampleImplicitLod:
3173    case SpvOpImageSparseSampleExplicitLod:
3174    case SpvOpImageSparseSampleDrefImplicitLod:
3175    case SpvOpImageSparseSampleDrefExplicitLod:
3176    case SpvOpImageSparseFetch:
3177    case SpvOpImageSparseGather:
3178    case SpvOpImageSparseDrefGather:
3179       is_sparse = true;
3180       break;
3181    default:
3182       break;
3183    }
3184 
3185    /* For OpImageQuerySizeLod, we always have an LOD */
3186    if (opcode == SpvOpImageQuerySizeLod)
3187       (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
3188 
3189    /* For OpFragmentFetchAMD, we always have a multisample index */
3190    if (opcode == SpvOpFragmentFetchAMD)
3191       (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
3192 
3193    /* Now we need to handle some number of optional arguments */
3194    struct vtn_value *gather_offsets = NULL;
3195    uint32_t operands = SpvImageOperandsMaskNone;
3196    if (idx < count) {
3197       operands = w[idx];
3198 
3199       if (operands & SpvImageOperandsBiasMask) {
3200          vtn_assert(texop == nir_texop_tex ||
3201                     texop == nir_texop_tg4);
3202          if (texop == nir_texop_tex)
3203             texop = nir_texop_txb;
3204          uint32_t arg = image_operand_arg(b, w, count, idx,
3205                                           SpvImageOperandsBiasMask);
3206          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_bias);
3207       }
3208 
3209       if (operands & SpvImageOperandsLodMask) {
3210          vtn_assert(texop == nir_texop_txl || texop == nir_texop_txf ||
3211                     texop == nir_texop_txs || texop == nir_texop_tg4);
3212          uint32_t arg = image_operand_arg(b, w, count, idx,
3213                                           SpvImageOperandsLodMask);
3214          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_lod);
3215       }
3216 
3217       if (operands & SpvImageOperandsGradMask) {
3218          vtn_assert(texop == nir_texop_txl);
3219          texop = nir_texop_txd;
3220          uint32_t arg = image_operand_arg(b, w, count, idx,
3221                                           SpvImageOperandsGradMask);
3222          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ddx);
3223          (*p++) = vtn_tex_src(b, w[arg + 1], nir_tex_src_ddy);
3224       }
3225 
3226       vtn_fail_if(util_bitcount(operands & (SpvImageOperandsConstOffsetsMask |
3227                                             SpvImageOperandsOffsetMask |
3228                                             SpvImageOperandsConstOffsetMask)) > 1,
3229                   "At most one of the ConstOffset, Offset, and ConstOffsets "
3230                   "image operands can be used on a given instruction.");
3231 
3232       if (operands & SpvImageOperandsOffsetMask) {
3233          uint32_t arg = image_operand_arg(b, w, count, idx,
3234                                           SpvImageOperandsOffsetMask);
3235          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3236       }
3237 
3238       if (operands & SpvImageOperandsConstOffsetMask) {
3239          uint32_t arg = image_operand_arg(b, w, count, idx,
3240                                           SpvImageOperandsConstOffsetMask);
3241          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3242       }
3243 
3244       if (operands & SpvImageOperandsConstOffsetsMask) {
3245          vtn_assert(texop == nir_texop_tg4);
3246          uint32_t arg = image_operand_arg(b, w, count, idx,
3247                                           SpvImageOperandsConstOffsetsMask);
3248          gather_offsets = vtn_value(b, w[arg], vtn_value_type_constant);
3249       }
3250 
3251       if (operands & SpvImageOperandsSampleMask) {
3252          vtn_assert(texop == nir_texop_txf_ms);
3253          uint32_t arg = image_operand_arg(b, w, count, idx,
3254                                           SpvImageOperandsSampleMask);
3255          texop = nir_texop_txf_ms;
3256          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ms_index);
3257       }
3258 
3259       if (operands & SpvImageOperandsMinLodMask) {
3260          vtn_assert(texop == nir_texop_tex ||
3261                     texop == nir_texop_txb ||
3262                     texop == nir_texop_txd);
3263          uint32_t arg = image_operand_arg(b, w, count, idx,
3264                                           SpvImageOperandsMinLodMask);
3265          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_min_lod);
3266       }
3267    }
3268 
3269    struct vtn_type *ret_type = vtn_get_type(b, w[1]);
3270    struct vtn_type *struct_type = NULL;
3271    if (is_sparse) {
3272       vtn_assert(glsl_type_is_struct_or_ifc(ret_type->type));
3273       struct_type = ret_type;
3274       ret_type = struct_type->members[1];
3275    }
3276 
3277    nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
3278    instr->op = texop;
3279 
3280    memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
3281 
3282    instr->coord_components = coord_components;
3283    instr->sampler_dim = sampler_dim;
3284    instr->is_array = is_array;
3285    instr->is_shadow = is_shadow;
3286    instr->is_sparse = is_sparse;
3287    instr->is_new_style_shadow =
3288       is_shadow && glsl_get_components(ret_type->type) == 1;
3289    instr->component = gather_component;
3290 
3291    /* If SpvCapabilityImageGatherBiasLodAMD is enabled, texture gather without an explicit LOD
3292     * has an implicit one (instead of using level 0).
3293     */
3294    if (texop == nir_texop_tg4 && b->image_gather_bias_lod &&
3295        !(operands & SpvImageOperandsLodMask)) {
3296       instr->is_gather_implicit_lod = true;
3297    }
3298 
3299    /* The Vulkan spec says:
3300     *
3301     *    "If an instruction loads from or stores to a resource (including
3302     *    atomics and image instructions) and the resource descriptor being
3303     *    accessed is not dynamically uniform, then the operand corresponding
3304     *    to that resource (e.g. the pointer or sampled image operand) must be
3305     *    decorated with NonUniform."
3306     *
3307     * It's very careful to specify that the exact operand must be decorated
3308     * NonUniform.  The SPIR-V parser is not expected to chase through long
3309     * chains to find the NonUniform decoration.  It's either right there or we
3310     * can assume it doesn't exist.
3311     */
3312    enum gl_access_qualifier access = 0;
3313    vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
3314 
3315    if (operands & SpvImageOperandsNontemporalMask)
3316       access |= ACCESS_NON_TEMPORAL;
3317 
3318    if (sampler && b->options->force_tex_non_uniform)
3319       access |= ACCESS_NON_UNIFORM;
3320 
3321    if (sampled_val->propagated_non_uniform)
3322       access |= ACCESS_NON_UNIFORM;
3323 
3324    if (image && (access & ACCESS_NON_UNIFORM))
3325       instr->texture_non_uniform = true;
3326 
3327    if (sampler && (access & ACCESS_NON_UNIFORM))
3328       instr->sampler_non_uniform = true;
3329 
3330    /* for non-query ops, get dest_type from SPIR-V return type */
3331    if (dest_type == nir_type_invalid) {
3332       /* the return type should match the image type, unless the image type is
3333        * VOID (CL image), in which case the return type dictates the sampler
3334        */
3335       enum glsl_base_type sampler_base =
3336          glsl_get_sampler_result_type(image->type);
3337       enum glsl_base_type ret_base = glsl_get_base_type(ret_type->type);
3338       vtn_fail_if(sampler_base != ret_base && sampler_base != GLSL_TYPE_VOID,
3339                   "SPIR-V return type mismatches image type. This is only valid "
3340                   "for untyped images (OpenCL).");
3341       dest_type = nir_get_nir_type_for_glsl_base_type(ret_base);
3342       dest_type = get_image_type(b, dest_type, operands);
3343    }
3344 
3345    instr->dest_type = dest_type;
3346 
3347    nir_def_init(&instr->instr, &instr->def,
3348                 nir_tex_instr_dest_size(instr), 32);
3349 
3350    vtn_assert(glsl_get_vector_elements(ret_type->type) ==
3351               nir_tex_instr_result_size(instr));
3352 
3353    if (gather_offsets) {
3354       vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array ||
3355                   gather_offsets->type->length != 4,
3356                   "ConstOffsets must be an array of size four of vectors "
3357                   "of two integer components");
3358 
3359       struct vtn_type *vec_type = gather_offsets->type->array_element;
3360       vtn_fail_if(vec_type->base_type != vtn_base_type_vector ||
3361                   vec_type->length != 2 ||
3362                   !glsl_type_is_integer(vec_type->type),
3363                   "ConstOffsets must be an array of size four of vectors "
3364                   "of two integer components");
3365 
3366       unsigned bit_size = glsl_get_bit_size(vec_type->type);
3367       for (uint32_t i = 0; i < 4; i++) {
3368          const nir_const_value *cvec =
3369             gather_offsets->constant->elements[i]->values;
3370          for (uint32_t j = 0; j < 2; j++) {
3371             switch (bit_size) {
3372             case 8:  instr->tg4_offsets[i][j] = cvec[j].i8;    break;
3373             case 16: instr->tg4_offsets[i][j] = cvec[j].i16;   break;
3374             case 32: instr->tg4_offsets[i][j] = cvec[j].i32;   break;
3375             case 64: instr->tg4_offsets[i][j] = cvec[j].i64;   break;
3376             default:
3377                vtn_fail("Unsupported bit size: %u", bit_size);
3378             }
3379          }
3380       }
3381    }
3382 
3383    nir_builder_instr_insert(&b->nb, &instr->instr);
3384 
3385    if (is_sparse) {
3386       struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3387       unsigned result_size = glsl_get_vector_elements(ret_type->type);
3388       dest->elems[0]->def = nir_channel(&b->nb, &instr->def, result_size);
3389       dest->elems[1]->def = nir_trim_vector(&b->nb, &instr->def,
3390                                               result_size);
3391       vtn_push_ssa_value(b, w[2], dest);
3392    } else {
3393       vtn_push_nir_ssa(b, w[2], &instr->def);
3394    }
3395 }
3396 
3397 static nir_atomic_op
translate_atomic_op(SpvOp opcode)3398 translate_atomic_op(SpvOp opcode)
3399 {
3400    switch (opcode) {
3401    case SpvOpAtomicExchange:            return nir_atomic_op_xchg;
3402    case SpvOpAtomicCompareExchange:     return nir_atomic_op_cmpxchg;
3403    case SpvOpAtomicCompareExchangeWeak: return nir_atomic_op_cmpxchg;
3404    case SpvOpAtomicIIncrement:          return nir_atomic_op_iadd;
3405    case SpvOpAtomicIDecrement:          return nir_atomic_op_iadd;
3406    case SpvOpAtomicIAdd:                return nir_atomic_op_iadd;
3407    case SpvOpAtomicISub:                return nir_atomic_op_iadd;
3408    case SpvOpAtomicSMin:                return nir_atomic_op_imin;
3409    case SpvOpAtomicUMin:                return nir_atomic_op_umin;
3410    case SpvOpAtomicSMax:                return nir_atomic_op_imax;
3411    case SpvOpAtomicUMax:                return nir_atomic_op_umax;
3412    case SpvOpAtomicAnd:                 return nir_atomic_op_iand;
3413    case SpvOpAtomicOr:                  return nir_atomic_op_ior;
3414    case SpvOpAtomicXor:                 return nir_atomic_op_ixor;
3415    case SpvOpAtomicFAddEXT:             return nir_atomic_op_fadd;
3416    case SpvOpAtomicFMinEXT:             return nir_atomic_op_fmin;
3417    case SpvOpAtomicFMaxEXT:             return nir_atomic_op_fmax;
3418    case SpvOpAtomicFlagTestAndSet:      return nir_atomic_op_cmpxchg;
3419    default:
3420       unreachable("Invalid atomic");
3421    }
3422 }
3423 
3424 static void
fill_common_atomic_sources(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_src * src)3425 fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
3426                            const uint32_t *w, nir_src *src)
3427 {
3428    const struct glsl_type *type = vtn_get_type(b, w[1])->type;
3429    unsigned bit_size = glsl_get_bit_size(type);
3430 
3431    switch (opcode) {
3432    case SpvOpAtomicIIncrement:
3433       src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 1, bit_size));
3434       break;
3435 
3436    case SpvOpAtomicIDecrement:
3437       src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, bit_size));
3438       break;
3439 
3440    case SpvOpAtomicISub:
3441       src[0] =
3442          nir_src_for_ssa(nir_ineg(&b->nb, vtn_get_nir_ssa(b, w[6])));
3443       break;
3444 
3445    case SpvOpAtomicCompareExchange:
3446    case SpvOpAtomicCompareExchangeWeak:
3447       src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[8]));
3448       src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[7]));
3449       break;
3450 
3451    case SpvOpAtomicExchange:
3452    case SpvOpAtomicIAdd:
3453    case SpvOpAtomicSMin:
3454    case SpvOpAtomicUMin:
3455    case SpvOpAtomicSMax:
3456    case SpvOpAtomicUMax:
3457    case SpvOpAtomicAnd:
3458    case SpvOpAtomicOr:
3459    case SpvOpAtomicXor:
3460    case SpvOpAtomicFAddEXT:
3461    case SpvOpAtomicFMinEXT:
3462    case SpvOpAtomicFMaxEXT:
3463       src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6]));
3464       break;
3465 
3466    default:
3467       vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3468    }
3469 }
3470 
3471 static nir_def *
get_image_coord(struct vtn_builder * b,uint32_t value)3472 get_image_coord(struct vtn_builder *b, uint32_t value)
3473 {
3474    nir_def *coord = vtn_get_nir_ssa(b, value);
3475    /* The image_load_store intrinsics assume a 4-dim coordinate */
3476    return nir_pad_vec4(&b->nb, coord);
3477 }
3478 
3479 static void
vtn_handle_image(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3480 vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
3481                  const uint32_t *w, unsigned count)
3482 {
3483    /* Just get this one out of the way */
3484    if (opcode == SpvOpImageTexelPointer) {
3485       struct vtn_value *val =
3486          vtn_push_value(b, w[2], vtn_value_type_image_pointer);
3487       val->image = vtn_alloc(b, struct vtn_image_pointer);
3488 
3489       val->image->image = vtn_nir_deref(b, w[3]);
3490       val->image->coord = get_image_coord(b, w[4]);
3491       val->image->sample = vtn_get_nir_ssa(b, w[5]);
3492       val->image->lod = nir_imm_int(&b->nb, 0);
3493       return;
3494    }
3495 
3496    struct vtn_image_pointer image;
3497    SpvScope scope = SpvScopeInvocation;
3498    SpvMemorySemanticsMask semantics = 0;
3499    SpvImageOperandsMask operands = SpvImageOperandsMaskNone;
3500 
3501    enum gl_access_qualifier access = 0;
3502 
3503    struct vtn_value *res_val;
3504    switch (opcode) {
3505    case SpvOpAtomicExchange:
3506    case SpvOpAtomicCompareExchange:
3507    case SpvOpAtomicCompareExchangeWeak:
3508    case SpvOpAtomicIIncrement:
3509    case SpvOpAtomicIDecrement:
3510    case SpvOpAtomicIAdd:
3511    case SpvOpAtomicISub:
3512    case SpvOpAtomicLoad:
3513    case SpvOpAtomicSMin:
3514    case SpvOpAtomicUMin:
3515    case SpvOpAtomicSMax:
3516    case SpvOpAtomicUMax:
3517    case SpvOpAtomicAnd:
3518    case SpvOpAtomicOr:
3519    case SpvOpAtomicXor:
3520    case SpvOpAtomicFAddEXT:
3521    case SpvOpAtomicFMinEXT:
3522    case SpvOpAtomicFMaxEXT:
3523       res_val = vtn_value(b, w[3], vtn_value_type_image_pointer);
3524       image = *res_val->image;
3525       scope = vtn_constant_uint(b, w[4]);
3526       semantics = vtn_constant_uint(b, w[5]);
3527       access |= ACCESS_COHERENT;
3528       break;
3529 
3530    case SpvOpAtomicStore:
3531       res_val = vtn_value(b, w[1], vtn_value_type_image_pointer);
3532       image = *res_val->image;
3533       scope = vtn_constant_uint(b, w[2]);
3534       semantics = vtn_constant_uint(b, w[3]);
3535       access |= ACCESS_COHERENT;
3536       break;
3537 
3538    case SpvOpImageQuerySizeLod:
3539       res_val = vtn_untyped_value(b, w[3]);
3540       image.image = vtn_get_image(b, w[3], &access);
3541       image.coord = NULL;
3542       image.sample = NULL;
3543       image.lod = vtn_ssa_value(b, w[4])->def;
3544       break;
3545 
3546    case SpvOpImageQuerySize:
3547    case SpvOpImageQuerySamples:
3548       res_val = vtn_untyped_value(b, w[3]);
3549       image.image = vtn_get_image(b, w[3], &access);
3550       image.coord = NULL;
3551       image.sample = NULL;
3552       image.lod = NULL;
3553       break;
3554 
3555    case SpvOpImageQueryFormat:
3556    case SpvOpImageQueryOrder:
3557       res_val = vtn_untyped_value(b, w[3]);
3558       image.image = vtn_get_image(b, w[3], &access);
3559       image.coord = NULL;
3560       image.sample = NULL;
3561       image.lod = NULL;
3562       break;
3563 
3564    case SpvOpImageRead:
3565    case SpvOpImageSparseRead: {
3566       res_val = vtn_untyped_value(b, w[3]);
3567       image.image = vtn_get_image(b, w[3], &access);
3568       image.coord = get_image_coord(b, w[4]);
3569 
3570       operands = count > 5 ? w[5] : SpvImageOperandsMaskNone;
3571 
3572       if (operands & SpvImageOperandsSampleMask) {
3573          uint32_t arg = image_operand_arg(b, w, count, 5,
3574                                           SpvImageOperandsSampleMask);
3575          image.sample = vtn_get_nir_ssa(b, w[arg]);
3576       } else {
3577          image.sample = nir_undef(&b->nb, 1, 32);
3578       }
3579 
3580       if (operands & SpvImageOperandsMakeTexelVisibleMask) {
3581          vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3582                      "MakeTexelVisible requires NonPrivateTexel to also be set.");
3583          uint32_t arg = image_operand_arg(b, w, count, 5,
3584                                           SpvImageOperandsMakeTexelVisibleMask);
3585          semantics = SpvMemorySemanticsMakeVisibleMask;
3586          scope = vtn_constant_uint(b, w[arg]);
3587       }
3588 
3589       if (operands & SpvImageOperandsLodMask) {
3590          uint32_t arg = image_operand_arg(b, w, count, 5,
3591                                           SpvImageOperandsLodMask);
3592          image.lod = vtn_get_nir_ssa(b, w[arg]);
3593       } else {
3594          image.lod = nir_imm_int(&b->nb, 0);
3595       }
3596 
3597       if (operands & SpvImageOperandsVolatileTexelMask)
3598          access |= ACCESS_VOLATILE;
3599       if (operands & SpvImageOperandsNontemporalMask)
3600          access |= ACCESS_NON_TEMPORAL;
3601 
3602       break;
3603    }
3604 
3605    case SpvOpImageWrite: {
3606       res_val = vtn_untyped_value(b, w[1]);
3607       image.image = vtn_get_image(b, w[1], &access);
3608       image.coord = get_image_coord(b, w[2]);
3609 
3610       /* texel = w[3] */
3611 
3612       operands = count > 4 ? w[4] : SpvImageOperandsMaskNone;
3613 
3614       if (operands & SpvImageOperandsSampleMask) {
3615          uint32_t arg = image_operand_arg(b, w, count, 4,
3616                                           SpvImageOperandsSampleMask);
3617          image.sample = vtn_get_nir_ssa(b, w[arg]);
3618       } else {
3619          image.sample = nir_undef(&b->nb, 1, 32);
3620       }
3621 
3622       if (operands & SpvImageOperandsMakeTexelAvailableMask) {
3623          vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3624                      "MakeTexelAvailable requires NonPrivateTexel to also be set.");
3625          uint32_t arg = image_operand_arg(b, w, count, 4,
3626                                           SpvImageOperandsMakeTexelAvailableMask);
3627          semantics = SpvMemorySemanticsMakeAvailableMask;
3628          scope = vtn_constant_uint(b, w[arg]);
3629       }
3630 
3631       if (operands & SpvImageOperandsLodMask) {
3632          uint32_t arg = image_operand_arg(b, w, count, 4,
3633                                           SpvImageOperandsLodMask);
3634          image.lod = vtn_get_nir_ssa(b, w[arg]);
3635       } else {
3636          image.lod = nir_imm_int(&b->nb, 0);
3637       }
3638 
3639       if (operands & SpvImageOperandsVolatileTexelMask)
3640          access |= ACCESS_VOLATILE;
3641       if (operands & SpvImageOperandsNontemporalMask)
3642          access |= ACCESS_NON_TEMPORAL;
3643 
3644       break;
3645    }
3646 
3647    default:
3648       vtn_fail_with_opcode("Invalid image opcode", opcode);
3649    }
3650 
3651    if (semantics & SpvMemorySemanticsVolatileMask)
3652       access |= ACCESS_VOLATILE;
3653 
3654    nir_intrinsic_op op;
3655    switch (opcode) {
3656 #define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_deref_##N; break;
3657    OP(ImageQuerySize,            size)
3658    OP(ImageQuerySizeLod,         size)
3659    OP(ImageRead,                 load)
3660    OP(ImageSparseRead,           sparse_load)
3661    OP(ImageWrite,                store)
3662    OP(AtomicLoad,                load)
3663    OP(AtomicStore,               store)
3664    OP(AtomicExchange,            atomic)
3665    OP(AtomicCompareExchange,     atomic_swap)
3666    OP(AtomicCompareExchangeWeak, atomic_swap)
3667    OP(AtomicIIncrement,          atomic)
3668    OP(AtomicIDecrement,          atomic)
3669    OP(AtomicIAdd,                atomic)
3670    OP(AtomicISub,                atomic)
3671    OP(AtomicSMin,                atomic)
3672    OP(AtomicUMin,                atomic)
3673    OP(AtomicSMax,                atomic)
3674    OP(AtomicUMax,                atomic)
3675    OP(AtomicAnd,                 atomic)
3676    OP(AtomicOr,                  atomic)
3677    OP(AtomicXor,                 atomic)
3678    OP(AtomicFAddEXT,             atomic)
3679    OP(AtomicFMinEXT,             atomic)
3680    OP(AtomicFMaxEXT,             atomic)
3681    OP(ImageQueryFormat,          format)
3682    OP(ImageQueryOrder,           order)
3683    OP(ImageQuerySamples,         samples)
3684 #undef OP
3685    default:
3686       vtn_fail_with_opcode("Invalid image opcode", opcode);
3687    }
3688 
3689    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
3690    if (nir_intrinsic_has_atomic_op(intrin))
3691       nir_intrinsic_set_atomic_op(intrin, translate_atomic_op(opcode));
3692 
3693    intrin->src[0] = nir_src_for_ssa(&image.image->def);
3694    nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(image.image->type));
3695    nir_intrinsic_set_image_array(intrin,
3696       glsl_sampler_type_is_array(image.image->type));
3697 
3698    switch (opcode) {
3699    case SpvOpImageQuerySamples:
3700    case SpvOpImageQuerySize:
3701    case SpvOpImageQuerySizeLod:
3702    case SpvOpImageQueryFormat:
3703    case SpvOpImageQueryOrder:
3704       break;
3705    default:
3706       /* The image coordinate is always 4 components but we may not have that
3707        * many.  Swizzle to compensate.
3708        */
3709       intrin->src[1] = nir_src_for_ssa(nir_pad_vec4(&b->nb, image.coord));
3710       intrin->src[2] = nir_src_for_ssa(image.sample);
3711       break;
3712    }
3713 
3714    /* The Vulkan spec says:
3715     *
3716     *    "If an instruction loads from or stores to a resource (including
3717     *    atomics and image instructions) and the resource descriptor being
3718     *    accessed is not dynamically uniform, then the operand corresponding
3719     *    to that resource (e.g. the pointer or sampled image operand) must be
3720     *    decorated with NonUniform."
3721     *
3722     * It's very careful to specify that the exact operand must be decorated
3723     * NonUniform.  The SPIR-V parser is not expected to chase through long
3724     * chains to find the NonUniform decoration.  It's either right there or we
3725     * can assume it doesn't exist.
3726     */
3727    vtn_foreach_decoration(b, res_val, non_uniform_decoration_cb, &access);
3728    nir_intrinsic_set_access(intrin, access);
3729 
3730    switch (opcode) {
3731    case SpvOpImageQuerySamples:
3732    case SpvOpImageQueryFormat:
3733    case SpvOpImageQueryOrder:
3734       /* No additional sources */
3735       break;
3736    case SpvOpImageQuerySize:
3737       intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0));
3738       break;
3739    case SpvOpImageQuerySizeLod:
3740       intrin->src[1] = nir_src_for_ssa(image.lod);
3741       break;
3742    case SpvOpAtomicLoad:
3743    case SpvOpImageRead:
3744    case SpvOpImageSparseRead:
3745       /* Only OpImageRead can support a lod parameter if
3746       * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3747       * intrinsics definition for atomics requires us to set it for
3748       * OpAtomicLoad.
3749       */
3750       intrin->src[3] = nir_src_for_ssa(image.lod);
3751       break;
3752    case SpvOpAtomicStore:
3753    case SpvOpImageWrite: {
3754       const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3];
3755       struct vtn_ssa_value *value = vtn_ssa_value(b, value_id);
3756       /* nir_intrinsic_image_deref_store always takes a vec4 value */
3757       assert(op == nir_intrinsic_image_deref_store);
3758       intrin->num_components = 4;
3759       intrin->src[3] = nir_src_for_ssa(nir_pad_vec4(&b->nb, value->def));
3760       /* Only OpImageWrite can support a lod parameter if
3761        * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3762        * intrinsics definition for atomics requires us to set it for
3763        * OpAtomicStore.
3764        */
3765       intrin->src[4] = nir_src_for_ssa(image.lod);
3766 
3767       nir_alu_type src_type =
3768          get_image_type(b, nir_get_nir_type_for_glsl_type(value->type), operands);
3769       nir_intrinsic_set_src_type(intrin, src_type);
3770       break;
3771    }
3772 
3773    case SpvOpAtomicCompareExchange:
3774    case SpvOpAtomicCompareExchangeWeak:
3775    case SpvOpAtomicIIncrement:
3776    case SpvOpAtomicIDecrement:
3777    case SpvOpAtomicExchange:
3778    case SpvOpAtomicIAdd:
3779    case SpvOpAtomicISub:
3780    case SpvOpAtomicSMin:
3781    case SpvOpAtomicUMin:
3782    case SpvOpAtomicSMax:
3783    case SpvOpAtomicUMax:
3784    case SpvOpAtomicAnd:
3785    case SpvOpAtomicOr:
3786    case SpvOpAtomicXor:
3787    case SpvOpAtomicFAddEXT:
3788    case SpvOpAtomicFMinEXT:
3789    case SpvOpAtomicFMaxEXT:
3790       fill_common_atomic_sources(b, opcode, w, &intrin->src[3]);
3791       break;
3792 
3793    default:
3794       vtn_fail_with_opcode("Invalid image opcode", opcode);
3795    }
3796 
3797    /* Image operations implicitly have the Image storage memory semantics. */
3798    semantics |= SpvMemorySemanticsImageMemoryMask;
3799 
3800    SpvMemorySemanticsMask before_semantics;
3801    SpvMemorySemanticsMask after_semantics;
3802    vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
3803 
3804    if (before_semantics)
3805       vtn_emit_memory_barrier(b, scope, before_semantics);
3806 
3807    if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) {
3808       struct vtn_type *type = vtn_get_type(b, w[1]);
3809       struct vtn_type *struct_type = NULL;
3810       if (opcode == SpvOpImageSparseRead) {
3811          vtn_assert(glsl_type_is_struct_or_ifc(type->type));
3812          struct_type = type;
3813          type = struct_type->members[1];
3814       }
3815 
3816       unsigned dest_components = glsl_get_vector_elements(type->type);
3817       if (opcode == SpvOpImageSparseRead)
3818          dest_components++;
3819 
3820       if (nir_intrinsic_infos[op].dest_components == 0)
3821          intrin->num_components = dest_components;
3822 
3823       unsigned bit_size = glsl_get_bit_size(type->type);
3824       if (opcode == SpvOpImageQuerySize ||
3825           opcode == SpvOpImageQuerySizeLod)
3826          bit_size = MIN2(bit_size, 32);
3827 
3828       nir_def_init(&intrin->instr, &intrin->def,
3829                    nir_intrinsic_dest_components(intrin), bit_size);
3830 
3831       nir_builder_instr_insert(&b->nb, &intrin->instr);
3832 
3833       nir_def *result = nir_trim_vector(&b->nb, &intrin->def,
3834                                               dest_components);
3835 
3836       if (opcode == SpvOpImageQuerySize ||
3837           opcode == SpvOpImageQuerySizeLod)
3838          result = nir_u2uN(&b->nb, result, glsl_get_bit_size(type->type));
3839 
3840       if (opcode == SpvOpImageSparseRead) {
3841          struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3842          unsigned res_type_size = glsl_get_vector_elements(type->type);
3843          dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size);
3844          if (intrin->def.bit_size != 32)
3845             dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def);
3846          dest->elems[1]->def = nir_trim_vector(&b->nb, result, res_type_size);
3847          vtn_push_ssa_value(b, w[2], dest);
3848       } else {
3849          vtn_push_nir_ssa(b, w[2], result);
3850       }
3851 
3852       if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead ||
3853           opcode == SpvOpAtomicLoad) {
3854          nir_alu_type dest_type =
3855             get_image_type(b, nir_get_nir_type_for_glsl_type(type->type), operands);
3856          nir_intrinsic_set_dest_type(intrin, dest_type);
3857       }
3858    } else {
3859       nir_builder_instr_insert(&b->nb, &intrin->instr);
3860    }
3861 
3862    if (after_semantics)
3863       vtn_emit_memory_barrier(b, scope, after_semantics);
3864 }
3865 
3866 static nir_intrinsic_op
get_uniform_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)3867 get_uniform_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
3868 {
3869    switch (opcode) {
3870 #define OP(S, N) case SpvOp##S: return nir_intrinsic_atomic_counter_ ##N;
3871    OP(AtomicLoad,                read_deref)
3872    OP(AtomicExchange,            exchange)
3873    OP(AtomicCompareExchange,     comp_swap)
3874    OP(AtomicCompareExchangeWeak, comp_swap)
3875    OP(AtomicIIncrement,          inc_deref)
3876    OP(AtomicIDecrement,          post_dec_deref)
3877    OP(AtomicIAdd,                add_deref)
3878    OP(AtomicISub,                add_deref)
3879    OP(AtomicUMin,                min_deref)
3880    OP(AtomicUMax,                max_deref)
3881    OP(AtomicAnd,                 and_deref)
3882    OP(AtomicOr,                  or_deref)
3883    OP(AtomicXor,                 xor_deref)
3884 #undef OP
3885    default:
3886       /* We left the following out: AtomicStore, AtomicSMin and
3887        * AtomicSmax. Right now there are not nir intrinsics for them. At this
3888        * moment Atomic Counter support is needed for ARB_spirv support, so is
3889        * only need to support GLSL Atomic Counters that are uints and don't
3890        * allow direct storage.
3891        */
3892       vtn_fail("Invalid uniform atomic");
3893    }
3894 }
3895 
3896 static nir_intrinsic_op
get_deref_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)3897 get_deref_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
3898 {
3899    switch (opcode) {
3900    case SpvOpAtomicLoad:         return nir_intrinsic_load_deref;
3901    case SpvOpAtomicFlagClear:
3902    case SpvOpAtomicStore:        return nir_intrinsic_store_deref;
3903 #define OP(S, N) case SpvOp##S: return nir_intrinsic_deref_##N;
3904    OP(AtomicExchange,            atomic)
3905    OP(AtomicCompareExchange,     atomic_swap)
3906    OP(AtomicCompareExchangeWeak, atomic_swap)
3907    OP(AtomicIIncrement,          atomic)
3908    OP(AtomicIDecrement,          atomic)
3909    OP(AtomicIAdd,                atomic)
3910    OP(AtomicISub,                atomic)
3911    OP(AtomicSMin,                atomic)
3912    OP(AtomicUMin,                atomic)
3913    OP(AtomicSMax,                atomic)
3914    OP(AtomicUMax,                atomic)
3915    OP(AtomicAnd,                 atomic)
3916    OP(AtomicOr,                  atomic)
3917    OP(AtomicXor,                 atomic)
3918    OP(AtomicFAddEXT,             atomic)
3919    OP(AtomicFMinEXT,             atomic)
3920    OP(AtomicFMaxEXT,             atomic)
3921    OP(AtomicFlagTestAndSet,      atomic_swap)
3922 #undef OP
3923    default:
3924       vtn_fail_with_opcode("Invalid shared atomic", opcode);
3925    }
3926 }
3927 
3928 /*
3929  * Handles shared atomics, ssbo atomics and atomic counters.
3930  */
3931 static void
vtn_handle_atomics(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)3932 vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
3933                    const uint32_t *w, UNUSED unsigned count)
3934 {
3935    struct vtn_pointer *ptr;
3936    nir_intrinsic_instr *atomic;
3937 
3938    SpvScope scope = SpvScopeInvocation;
3939    SpvMemorySemanticsMask semantics = 0;
3940    enum gl_access_qualifier access = 0;
3941 
3942    switch (opcode) {
3943    case SpvOpAtomicLoad:
3944    case SpvOpAtomicExchange:
3945    case SpvOpAtomicCompareExchange:
3946    case SpvOpAtomicCompareExchangeWeak:
3947    case SpvOpAtomicIIncrement:
3948    case SpvOpAtomicIDecrement:
3949    case SpvOpAtomicIAdd:
3950    case SpvOpAtomicISub:
3951    case SpvOpAtomicSMin:
3952    case SpvOpAtomicUMin:
3953    case SpvOpAtomicSMax:
3954    case SpvOpAtomicUMax:
3955    case SpvOpAtomicAnd:
3956    case SpvOpAtomicOr:
3957    case SpvOpAtomicXor:
3958    case SpvOpAtomicFAddEXT:
3959    case SpvOpAtomicFMinEXT:
3960    case SpvOpAtomicFMaxEXT:
3961    case SpvOpAtomicFlagTestAndSet:
3962       ptr = vtn_pointer(b, w[3]);
3963       scope = vtn_constant_uint(b, w[4]);
3964       semantics = vtn_constant_uint(b, w[5]);
3965       break;
3966    case SpvOpAtomicFlagClear:
3967    case SpvOpAtomicStore:
3968       ptr = vtn_pointer(b, w[1]);
3969       scope = vtn_constant_uint(b, w[2]);
3970       semantics = vtn_constant_uint(b, w[3]);
3971       break;
3972 
3973    default:
3974       vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3975    }
3976 
3977    if (semantics & SpvMemorySemanticsVolatileMask)
3978       access |= ACCESS_VOLATILE;
3979 
3980    /* uniform as "atomic counter uniform" */
3981    if (ptr->mode == vtn_variable_mode_atomic_counter) {
3982       nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
3983       nir_intrinsic_op op = get_uniform_nir_atomic_op(b, opcode);
3984       atomic = nir_intrinsic_instr_create(b->nb.shader, op);
3985       atomic->src[0] = nir_src_for_ssa(&deref->def);
3986 
3987       /* SSBO needs to initialize index/offset. In this case we don't need to,
3988        * as that info is already stored on the ptr->var->var nir_variable (see
3989        * vtn_create_variable)
3990        */
3991 
3992       switch (opcode) {
3993       case SpvOpAtomicLoad:
3994       case SpvOpAtomicExchange:
3995       case SpvOpAtomicCompareExchange:
3996       case SpvOpAtomicCompareExchangeWeak:
3997       case SpvOpAtomicIIncrement:
3998       case SpvOpAtomicIDecrement:
3999       case SpvOpAtomicIAdd:
4000       case SpvOpAtomicISub:
4001       case SpvOpAtomicSMin:
4002       case SpvOpAtomicUMin:
4003       case SpvOpAtomicSMax:
4004       case SpvOpAtomicUMax:
4005       case SpvOpAtomicAnd:
4006       case SpvOpAtomicOr:
4007       case SpvOpAtomicXor:
4008          /* Nothing: we don't need to call fill_common_atomic_sources here, as
4009           * atomic counter uniforms doesn't have sources
4010           */
4011          break;
4012 
4013       default:
4014          unreachable("Invalid SPIR-V atomic");
4015 
4016       }
4017    } else {
4018       nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
4019       const struct glsl_type *deref_type = deref->type;
4020       nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode);
4021       atomic = nir_intrinsic_instr_create(b->nb.shader, op);
4022       atomic->src[0] = nir_src_for_ssa(&deref->def);
4023 
4024       if (nir_intrinsic_has_atomic_op(atomic))
4025          nir_intrinsic_set_atomic_op(atomic, translate_atomic_op(opcode));
4026 
4027       if (ptr->mode != vtn_variable_mode_workgroup)
4028          access |= ACCESS_COHERENT;
4029 
4030       nir_intrinsic_set_access(atomic, access);
4031 
4032       switch (opcode) {
4033       case SpvOpAtomicLoad:
4034          atomic->num_components = glsl_get_vector_elements(deref_type);
4035          break;
4036 
4037       case SpvOpAtomicStore:
4038          atomic->num_components = glsl_get_vector_elements(deref_type);
4039          nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
4040          atomic->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
4041          break;
4042 
4043       case SpvOpAtomicFlagClear:
4044          atomic->num_components = 1;
4045          nir_intrinsic_set_write_mask(atomic, 1);
4046          atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4047          break;
4048       case SpvOpAtomicFlagTestAndSet:
4049          atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4050          atomic->src[2] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, 32));
4051          break;
4052       case SpvOpAtomicExchange:
4053       case SpvOpAtomicCompareExchange:
4054       case SpvOpAtomicCompareExchangeWeak:
4055       case SpvOpAtomicIIncrement:
4056       case SpvOpAtomicIDecrement:
4057       case SpvOpAtomicIAdd:
4058       case SpvOpAtomicISub:
4059       case SpvOpAtomicSMin:
4060       case SpvOpAtomicUMin:
4061       case SpvOpAtomicSMax:
4062       case SpvOpAtomicUMax:
4063       case SpvOpAtomicAnd:
4064       case SpvOpAtomicOr:
4065       case SpvOpAtomicXor:
4066       case SpvOpAtomicFAddEXT:
4067       case SpvOpAtomicFMinEXT:
4068       case SpvOpAtomicFMaxEXT:
4069          fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
4070          break;
4071 
4072       default:
4073          vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
4074       }
4075    }
4076 
4077    /* Atomic ordering operations will implicitly apply to the atomic operation
4078     * storage class, so include that too.
4079     */
4080    semantics |= vtn_mode_to_memory_semantics(ptr->mode);
4081 
4082    SpvMemorySemanticsMask before_semantics;
4083    SpvMemorySemanticsMask after_semantics;
4084    vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
4085 
4086    if (before_semantics)
4087       vtn_emit_memory_barrier(b, scope, before_semantics);
4088 
4089    if (opcode != SpvOpAtomicStore && opcode != SpvOpAtomicFlagClear) {
4090       struct vtn_type *type = vtn_get_type(b, w[1]);
4091 
4092       if (opcode == SpvOpAtomicFlagTestAndSet) {
4093          /* map atomic flag to a 32-bit atomic integer. */
4094          nir_def_init(&atomic->instr, &atomic->def, 1, 32);
4095       } else {
4096          nir_def_init(&atomic->instr, &atomic->def,
4097                       glsl_get_vector_elements(type->type),
4098                       glsl_get_bit_size(type->type));
4099 
4100          vtn_push_nir_ssa(b, w[2], &atomic->def);
4101       }
4102    }
4103 
4104    nir_builder_instr_insert(&b->nb, &atomic->instr);
4105 
4106    if (opcode == SpvOpAtomicFlagTestAndSet) {
4107       vtn_push_nir_ssa(b, w[2], nir_i2b(&b->nb, &atomic->def));
4108    }
4109    if (after_semantics)
4110       vtn_emit_memory_barrier(b, scope, after_semantics);
4111 }
4112 
4113 static nir_alu_instr *
create_vec(struct vtn_builder * b,unsigned num_components,unsigned bit_size)4114 create_vec(struct vtn_builder *b, unsigned num_components, unsigned bit_size)
4115 {
4116    nir_op op = nir_op_vec(num_components);
4117    nir_alu_instr *vec = nir_alu_instr_create(b->shader, op);
4118    nir_def_init(&vec->instr, &vec->def, num_components, bit_size);
4119 
4120    return vec;
4121 }
4122 
4123 struct vtn_ssa_value *
vtn_ssa_transpose(struct vtn_builder * b,struct vtn_ssa_value * src)4124 vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
4125 {
4126    if (src->transposed)
4127       return src->transposed;
4128 
4129    struct vtn_ssa_value *dest =
4130       vtn_create_ssa_value(b, glsl_transposed_type(src->type));
4131 
4132    for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
4133       if (glsl_type_is_vector_or_scalar(src->type)) {
4134          dest->elems[i]->def = nir_channel(&b->nb, src->def, i);
4135       } else {
4136          unsigned cols = glsl_get_matrix_columns(src->type);
4137          nir_scalar srcs[NIR_MAX_MATRIX_COLUMNS];
4138          for (unsigned j = 0; j < cols; j++) {
4139             srcs[j] = nir_get_scalar(src->elems[j]->def, i);
4140          }
4141          dest->elems[i]->def = nir_vec_scalars(&b->nb, srcs, cols);
4142       }
4143    }
4144 
4145    dest->transposed = src;
4146 
4147    return dest;
4148 }
4149 
4150 static nir_def *
vtn_vector_shuffle(struct vtn_builder * b,unsigned num_components,nir_def * src0,nir_def * src1,const uint32_t * indices)4151 vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
4152                    nir_def *src0, nir_def *src1,
4153                    const uint32_t *indices)
4154 {
4155    nir_alu_instr *vec = create_vec(b, num_components, src0->bit_size);
4156 
4157    for (unsigned i = 0; i < num_components; i++) {
4158       uint32_t index = indices[i];
4159       unsigned total_components = src0->num_components + src1->num_components;
4160       vtn_fail_if(index != 0xffffffff && index >= total_components,
4161                   "OpVectorShuffle: All Component literals must either be "
4162                   "FFFFFFFF or in [0, N - 1] (inclusive)");
4163 
4164       if (index == 0xffffffff) {
4165          vec->src[i].src =
4166             nir_src_for_ssa(nir_undef(&b->nb, 1, src0->bit_size));
4167       } else if (index < src0->num_components) {
4168          vec->src[i].src = nir_src_for_ssa(src0);
4169          vec->src[i].swizzle[0] = index;
4170       } else {
4171          vec->src[i].src = nir_src_for_ssa(src1);
4172          vec->src[i].swizzle[0] = index - src0->num_components;
4173       }
4174    }
4175 
4176    nir_builder_instr_insert(&b->nb, &vec->instr);
4177 
4178    return &vec->def;
4179 }
4180 
4181 /*
4182  * Concatentates a number of vectors/scalars together to produce a vector
4183  */
4184 static nir_def *
vtn_vector_construct(struct vtn_builder * b,unsigned num_components,unsigned num_srcs,nir_def ** srcs)4185 vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
4186                      unsigned num_srcs, nir_def **srcs)
4187 {
4188    nir_alu_instr *vec = create_vec(b, num_components, srcs[0]->bit_size);
4189 
4190    /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4191     *
4192     *    "When constructing a vector, there must be at least two Constituent
4193     *    operands."
4194     */
4195    vtn_assert(num_srcs >= 2);
4196 
4197    unsigned dest_idx = 0;
4198    for (unsigned i = 0; i < num_srcs; i++) {
4199       nir_def *src = srcs[i];
4200       vtn_assert(dest_idx + src->num_components <= num_components);
4201       for (unsigned j = 0; j < src->num_components; j++) {
4202          vec->src[dest_idx].src = nir_src_for_ssa(src);
4203          vec->src[dest_idx].swizzle[0] = j;
4204          dest_idx++;
4205       }
4206    }
4207 
4208    /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4209     *
4210     *    "When constructing a vector, the total number of components in all
4211     *    the operands must equal the number of components in Result Type."
4212     */
4213    vtn_assert(dest_idx == num_components);
4214 
4215    nir_builder_instr_insert(&b->nb, &vec->instr);
4216 
4217    return &vec->def;
4218 }
4219 
4220 static struct vtn_ssa_value *
vtn_composite_copy(struct vtn_builder * b,struct vtn_ssa_value * src)4221 vtn_composite_copy(struct vtn_builder *b, struct vtn_ssa_value *src)
4222 {
4223    assert(!src->is_variable);
4224 
4225    struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
4226    dest->type = src->type;
4227 
4228    if (glsl_type_is_vector_or_scalar(src->type)) {
4229       dest->def = src->def;
4230    } else {
4231       unsigned elems = glsl_get_length(src->type);
4232 
4233       dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
4234       for (unsigned i = 0; i < elems; i++)
4235          dest->elems[i] = vtn_composite_copy(b, src->elems[i]);
4236    }
4237 
4238    return dest;
4239 }
4240 
4241 static struct vtn_ssa_value *
vtn_composite_insert(struct vtn_builder * b,struct vtn_ssa_value * src,struct vtn_ssa_value * insert,const uint32_t * indices,unsigned num_indices)4242 vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
4243                      struct vtn_ssa_value *insert, const uint32_t *indices,
4244                      unsigned num_indices)
4245 {
4246    if (glsl_type_is_cmat(src->type))
4247       return vtn_cooperative_matrix_insert(b, src, insert, indices, num_indices);
4248 
4249    struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
4250 
4251    struct vtn_ssa_value *cur = dest;
4252    unsigned i;
4253    for (i = 0; i < num_indices - 1; i++) {
4254       /* If we got a vector here, that means the next index will be trying to
4255        * dereference a scalar.
4256        */
4257       vtn_fail_if(glsl_type_is_vector_or_scalar(cur->type),
4258                   "OpCompositeInsert has too many indices.");
4259       vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4260                   "All indices in an OpCompositeInsert must be in-bounds");
4261       cur = cur->elems[indices[i]];
4262    }
4263 
4264    if (glsl_type_is_vector_or_scalar(cur->type)) {
4265       vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4266                   "All indices in an OpCompositeInsert must be in-bounds");
4267 
4268       /* According to the SPIR-V spec, OpCompositeInsert may work down to
4269        * the component granularity. In that case, the last index will be
4270        * the index to insert the scalar into the vector.
4271        */
4272 
4273       cur->def = nir_vector_insert_imm(&b->nb, cur->def, insert->def, indices[i]);
4274    } else {
4275       vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4276                   "All indices in an OpCompositeInsert must be in-bounds");
4277       cur->elems[indices[i]] = insert;
4278    }
4279 
4280    return dest;
4281 }
4282 
4283 static struct vtn_ssa_value *
vtn_composite_extract(struct vtn_builder * b,struct vtn_ssa_value * src,const uint32_t * indices,unsigned num_indices)4284 vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
4285                       const uint32_t *indices, unsigned num_indices)
4286 {
4287    if (glsl_type_is_cmat(src->type))
4288       return vtn_cooperative_matrix_extract(b, src, indices, num_indices);
4289 
4290    struct vtn_ssa_value *cur = src;
4291    for (unsigned i = 0; i < num_indices; i++) {
4292       if (glsl_type_is_vector_or_scalar(cur->type)) {
4293          vtn_assert(i == num_indices - 1);
4294          vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4295                      "All indices in an OpCompositeExtract must be in-bounds");
4296 
4297          /* According to the SPIR-V spec, OpCompositeExtract may work down to
4298           * the component granularity. The last index will be the index of the
4299           * vector to extract.
4300           */
4301 
4302          const struct glsl_type *scalar_type =
4303             glsl_scalar_type(glsl_get_base_type(cur->type));
4304          struct vtn_ssa_value *ret = vtn_create_ssa_value(b, scalar_type);
4305          ret->def = nir_channel(&b->nb, cur->def, indices[i]);
4306          return ret;
4307       } else {
4308          vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4309                      "All indices in an OpCompositeExtract must be in-bounds");
4310          cur = cur->elems[indices[i]];
4311       }
4312    }
4313 
4314    return cur;
4315 }
4316 
4317 static void
vtn_handle_composite(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4318 vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
4319                      const uint32_t *w, unsigned count)
4320 {
4321    struct vtn_type *type = vtn_get_type(b, w[1]);
4322    struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
4323 
4324    switch (opcode) {
4325    case SpvOpVectorExtractDynamic:
4326       ssa->def = nir_vector_extract(&b->nb, vtn_get_nir_ssa(b, w[3]),
4327                                     vtn_get_nir_ssa(b, w[4]));
4328       break;
4329 
4330    case SpvOpVectorInsertDynamic:
4331       ssa->def = nir_vector_insert(&b->nb, vtn_get_nir_ssa(b, w[3]),
4332                                    vtn_get_nir_ssa(b, w[4]),
4333                                    vtn_get_nir_ssa(b, w[5]));
4334       break;
4335 
4336    case SpvOpVectorShuffle:
4337       ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type),
4338                                     vtn_get_nir_ssa(b, w[3]),
4339                                     vtn_get_nir_ssa(b, w[4]),
4340                                     w + 5);
4341       break;
4342 
4343    case SpvOpCompositeConstruct: {
4344       unsigned elems = count - 3;
4345       assume(elems >= 1);
4346       if (type->base_type == vtn_base_type_cooperative_matrix) {
4347          vtn_assert(elems == 1);
4348          nir_deref_instr *mat = vtn_create_cmat_temporary(b, type->type, "cmat_construct");
4349          nir_cmat_construct(&b->nb, &mat->def, vtn_get_nir_ssa(b, w[3]));
4350          vtn_set_ssa_value_var(b, ssa, mat->var);
4351       } else if (glsl_type_is_vector_or_scalar(type->type)) {
4352          nir_def *srcs[NIR_MAX_VEC_COMPONENTS];
4353          for (unsigned i = 0; i < elems; i++) {
4354             srcs[i] = vtn_get_nir_ssa(b, w[3 + i]);
4355             vtn_assert(glsl_get_bit_size(type->type) == srcs[i]->bit_size);
4356          }
4357          ssa->def =
4358             vtn_vector_construct(b, glsl_get_vector_elements(type->type),
4359                                  elems, srcs);
4360       } else {
4361          ssa->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
4362          for (unsigned i = 0; i < elems; i++)
4363             ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
4364       }
4365       break;
4366    }
4367    case SpvOpCompositeExtract:
4368       ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
4369                                   w + 4, count - 4);
4370       break;
4371 
4372    case SpvOpCompositeInsert:
4373       ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
4374                                  vtn_ssa_value(b, w[3]),
4375                                  w + 5, count - 5);
4376       break;
4377 
4378    case SpvOpCopyLogical: {
4379       ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
4380       struct vtn_type *dst_type = vtn_get_value_type(b, w[2]);
4381       vtn_assert(vtn_types_compatible(b, type, dst_type));
4382       ssa->type = glsl_get_bare_type(dst_type->type);
4383       break;
4384    }
4385    case SpvOpCopyObject:
4386    case SpvOpExpectKHR:
4387       vtn_copy_value(b, w[3], w[2]);
4388       return;
4389 
4390    default:
4391       vtn_fail_with_opcode("unknown composite operation", opcode);
4392    }
4393 
4394    vtn_push_ssa_value(b, w[2], ssa);
4395 }
4396 
4397 static void
vtn_handle_barrier(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4398 vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
4399                    const uint32_t *w, UNUSED unsigned count)
4400 {
4401    switch (opcode) {
4402    case SpvOpEmitVertex:
4403    case SpvOpEmitStreamVertex:
4404    case SpvOpEndPrimitive:
4405    case SpvOpEndStreamPrimitive: {
4406       unsigned stream = 0;
4407       if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
4408          stream = vtn_constant_uint(b, w[1]);
4409 
4410       switch (opcode) {
4411       case SpvOpEmitStreamVertex:
4412       case SpvOpEmitVertex:
4413          nir_emit_vertex(&b->nb, stream);
4414          break;
4415       case SpvOpEndPrimitive:
4416       case SpvOpEndStreamPrimitive:
4417          nir_end_primitive(&b->nb, stream);
4418          break;
4419       default:
4420          unreachable("Invalid opcode");
4421       }
4422       break;
4423    }
4424 
4425    case SpvOpMemoryBarrier: {
4426       SpvScope scope = vtn_constant_uint(b, w[1]);
4427       SpvMemorySemanticsMask semantics = vtn_constant_uint(b, w[2]);
4428       vtn_emit_memory_barrier(b, scope, semantics);
4429       return;
4430    }
4431 
4432    case SpvOpControlBarrier: {
4433       SpvScope execution_scope = vtn_constant_uint(b, w[1]);
4434       SpvScope memory_scope = vtn_constant_uint(b, w[2]);
4435       SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]);
4436 
4437       /* GLSLang, prior to commit 8297936dd6eb3, emitted OpControlBarrier with
4438        * memory semantics of None for GLSL barrier().
4439        * And before that, prior to c3f1cdfa, emitted the OpControlBarrier with
4440        * Device instead of Workgroup for execution scope.
4441        */
4442       if (b->wa_glslang_cs_barrier &&
4443           b->nb.shader->info.stage == MESA_SHADER_COMPUTE &&
4444           (execution_scope == SpvScopeWorkgroup ||
4445            execution_scope == SpvScopeDevice) &&
4446           memory_semantics == SpvMemorySemanticsMaskNone) {
4447          execution_scope = SpvScopeWorkgroup;
4448          memory_scope = SpvScopeWorkgroup;
4449          memory_semantics = SpvMemorySemanticsAcquireReleaseMask |
4450                             SpvMemorySemanticsWorkgroupMemoryMask;
4451       }
4452 
4453       /* From the SPIR-V spec:
4454        *
4455        *    "When used with the TessellationControl execution model, it also
4456        *    implicitly synchronizes the Output Storage Class: Writes to Output
4457        *    variables performed by any invocation executed prior to a
4458        *    OpControlBarrier will be visible to any other invocation after
4459        *    return from that OpControlBarrier."
4460        *
4461        * The same applies to VK_NV_mesh_shader.
4462        */
4463       if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL ||
4464           b->nb.shader->info.stage == MESA_SHADER_TASK ||
4465           b->nb.shader->info.stage == MESA_SHADER_MESH) {
4466          memory_semantics &= ~(SpvMemorySemanticsAcquireMask |
4467                                SpvMemorySemanticsReleaseMask |
4468                                SpvMemorySemanticsAcquireReleaseMask |
4469                                SpvMemorySemanticsSequentiallyConsistentMask);
4470          memory_semantics |= SpvMemorySemanticsAcquireReleaseMask |
4471                              SpvMemorySemanticsOutputMemoryMask;
4472       }
4473 
4474       vtn_emit_scoped_control_barrier(b, execution_scope, memory_scope,
4475                                       memory_semantics);
4476       break;
4477    }
4478 
4479    default:
4480       unreachable("unknown barrier instruction");
4481    }
4482 }
4483 
4484 static enum tess_primitive_mode
tess_primitive_mode_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4485 tess_primitive_mode_from_spv_execution_mode(struct vtn_builder *b,
4486                                             SpvExecutionMode mode)
4487 {
4488    switch (mode) {
4489    case SpvExecutionModeTriangles:
4490       return TESS_PRIMITIVE_TRIANGLES;
4491    case SpvExecutionModeQuads:
4492       return TESS_PRIMITIVE_QUADS;
4493    case SpvExecutionModeIsolines:
4494       return TESS_PRIMITIVE_ISOLINES;
4495    default:
4496       vtn_fail("Invalid tess primitive type: %s (%u)",
4497                spirv_executionmode_to_string(mode), mode);
4498    }
4499 }
4500 
4501 static enum mesa_prim
primitive_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4502 primitive_from_spv_execution_mode(struct vtn_builder *b,
4503                                   SpvExecutionMode mode)
4504 {
4505    switch (mode) {
4506    case SpvExecutionModeInputPoints:
4507    case SpvExecutionModeOutputPoints:
4508       return MESA_PRIM_POINTS;
4509    case SpvExecutionModeInputLines:
4510    case SpvExecutionModeOutputLinesNV:
4511       return MESA_PRIM_LINES;
4512    case SpvExecutionModeInputLinesAdjacency:
4513       return MESA_PRIM_LINES_ADJACENCY;
4514    case SpvExecutionModeTriangles:
4515    case SpvExecutionModeOutputTrianglesNV:
4516       return MESA_PRIM_TRIANGLES;
4517    case SpvExecutionModeInputTrianglesAdjacency:
4518       return MESA_PRIM_TRIANGLES_ADJACENCY;
4519    case SpvExecutionModeQuads:
4520       return MESA_PRIM_QUADS;
4521    case SpvExecutionModeOutputLineStrip:
4522       return MESA_PRIM_LINE_STRIP;
4523    case SpvExecutionModeOutputTriangleStrip:
4524       return MESA_PRIM_TRIANGLE_STRIP;
4525    default:
4526       vtn_fail("Invalid primitive type: %s (%u)",
4527                spirv_executionmode_to_string(mode), mode);
4528    }
4529 }
4530 
4531 static unsigned
vertices_in_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4532 vertices_in_from_spv_execution_mode(struct vtn_builder *b,
4533                                     SpvExecutionMode mode)
4534 {
4535    switch (mode) {
4536    case SpvExecutionModeInputPoints:
4537       return 1;
4538    case SpvExecutionModeInputLines:
4539       return 2;
4540    case SpvExecutionModeInputLinesAdjacency:
4541       return 4;
4542    case SpvExecutionModeTriangles:
4543       return 3;
4544    case SpvExecutionModeInputTrianglesAdjacency:
4545       return 6;
4546    default:
4547       vtn_fail("Invalid GS input mode: %s (%u)",
4548                spirv_executionmode_to_string(mode), mode);
4549    }
4550 }
4551 
4552 gl_shader_stage
vtn_stage_for_execution_model(SpvExecutionModel model)4553 vtn_stage_for_execution_model(SpvExecutionModel model)
4554 {
4555    switch (model) {
4556    case SpvExecutionModelVertex:
4557       return MESA_SHADER_VERTEX;
4558    case SpvExecutionModelTessellationControl:
4559       return MESA_SHADER_TESS_CTRL;
4560    case SpvExecutionModelTessellationEvaluation:
4561       return MESA_SHADER_TESS_EVAL;
4562    case SpvExecutionModelGeometry:
4563       return MESA_SHADER_GEOMETRY;
4564    case SpvExecutionModelFragment:
4565       return MESA_SHADER_FRAGMENT;
4566    case SpvExecutionModelGLCompute:
4567       return MESA_SHADER_COMPUTE;
4568    case SpvExecutionModelKernel:
4569       return MESA_SHADER_KERNEL;
4570    case SpvExecutionModelRayGenerationKHR:
4571       return MESA_SHADER_RAYGEN;
4572    case SpvExecutionModelAnyHitKHR:
4573       return MESA_SHADER_ANY_HIT;
4574    case SpvExecutionModelClosestHitKHR:
4575       return MESA_SHADER_CLOSEST_HIT;
4576    case SpvExecutionModelMissKHR:
4577       return MESA_SHADER_MISS;
4578    case SpvExecutionModelIntersectionKHR:
4579       return MESA_SHADER_INTERSECTION;
4580    case SpvExecutionModelCallableKHR:
4581        return MESA_SHADER_CALLABLE;
4582    case SpvExecutionModelTaskNV:
4583    case SpvExecutionModelTaskEXT:
4584       return MESA_SHADER_TASK;
4585    case SpvExecutionModelMeshNV:
4586    case SpvExecutionModelMeshEXT:
4587       return MESA_SHADER_MESH;
4588    default:
4589       return MESA_SHADER_NONE;
4590    }
4591 }
4592 
4593 #define spv_check_supported(name, cap) do {                 \
4594       if (!(b->options && b->options->caps.name))           \
4595          vtn_warn("Unsupported SPIR-V capability: %s (%u)", \
4596                   spirv_capability_to_string(cap), cap);    \
4597    } while(0)
4598 
4599 
4600 void
vtn_handle_entry_point(struct vtn_builder * b,const uint32_t * w,unsigned count)4601 vtn_handle_entry_point(struct vtn_builder *b, const uint32_t *w,
4602                        unsigned count)
4603 {
4604    struct vtn_value *entry_point = &b->values[w[2]];
4605    /* Let this be a name label regardless */
4606    unsigned name_words;
4607    entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
4608 
4609    gl_shader_stage stage = vtn_stage_for_execution_model(w[1]);
4610    vtn_fail_if(stage == MESA_SHADER_NONE,
4611                "Unsupported execution model: %s (%u)",
4612                spirv_executionmodel_to_string(w[1]), w[1]);
4613    if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
4614        stage != b->entry_point_stage)
4615       return;
4616 
4617    vtn_assert(b->entry_point == NULL);
4618    b->entry_point = entry_point;
4619 
4620    /* Entry points enumerate which global variables are used. */
4621    size_t start = 3 + name_words;
4622    b->interface_ids_count = count - start;
4623    b->interface_ids = vtn_alloc_array(b, uint32_t, b->interface_ids_count);
4624    memcpy(b->interface_ids, &w[start], b->interface_ids_count * 4);
4625    qsort(b->interface_ids, b->interface_ids_count, 4, cmp_uint32_t);
4626 }
4627 
4628 static bool
vtn_handle_preamble_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4629 vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
4630                                 const uint32_t *w, unsigned count)
4631 {
4632    switch (opcode) {
4633    case SpvOpString:
4634    case SpvOpSource:
4635    case SpvOpSourceExtension:
4636    case SpvOpSourceContinued:
4637    case SpvOpModuleProcessed:
4638       vtn_handle_debug_text(b, opcode, w, count);
4639       break;
4640 
4641    case SpvOpExtension: {
4642       /* Implementing both NV_mesh_shader and EXT_mesh_shader
4643        * is difficult without knowing which we're dealing with.
4644        * TODO: remove this when we stop supporting NV_mesh_shader.
4645        */
4646       const char *ext_name = (const char *)&w[1];
4647       if (strcmp(ext_name, "SPV_NV_mesh_shader") == 0)
4648          b->shader->info.mesh.nv = true;
4649       break;
4650    }
4651 
4652    case SpvOpCapability: {
4653       SpvCapability cap = w[1];
4654       switch (cap) {
4655       case SpvCapabilityMatrix:
4656       case SpvCapabilityShader:
4657       case SpvCapabilityGeometry:
4658       case SpvCapabilityGeometryPointSize:
4659       case SpvCapabilityUniformBufferArrayDynamicIndexing:
4660       case SpvCapabilitySampledImageArrayDynamicIndexing:
4661       case SpvCapabilityStorageBufferArrayDynamicIndexing:
4662       case SpvCapabilityStorageImageArrayDynamicIndexing:
4663       case SpvCapabilityImageRect:
4664       case SpvCapabilitySampledRect:
4665       case SpvCapabilitySampled1D:
4666       case SpvCapabilityImage1D:
4667       case SpvCapabilitySampledCubeArray:
4668       case SpvCapabilityImageCubeArray:
4669       case SpvCapabilitySampledBuffer:
4670       case SpvCapabilityImageBuffer:
4671       case SpvCapabilityImageQuery:
4672       case SpvCapabilityDerivativeControl:
4673       case SpvCapabilityInterpolationFunction:
4674       case SpvCapabilityMultiViewport:
4675       case SpvCapabilitySampleRateShading:
4676       case SpvCapabilityClipDistance:
4677       case SpvCapabilityCullDistance:
4678       case SpvCapabilityInputAttachment:
4679       case SpvCapabilityImageGatherExtended:
4680       case SpvCapabilityStorageImageExtendedFormats:
4681       case SpvCapabilityVector16:
4682       case SpvCapabilityDotProduct:
4683       case SpvCapabilityDotProductInputAll:
4684       case SpvCapabilityDotProductInput4x8Bit:
4685       case SpvCapabilityDotProductInput4x8BitPacked:
4686       case SpvCapabilityExpectAssumeKHR:
4687          break;
4688 
4689       case SpvCapabilityLinkage:
4690          if (!b->options->create_library)
4691             vtn_warn("Unsupported SPIR-V capability: %s",
4692                      spirv_capability_to_string(cap));
4693          spv_check_supported(linkage, cap);
4694          break;
4695 
4696       case SpvCapabilitySparseResidency:
4697          spv_check_supported(sparse_residency, cap);
4698          break;
4699 
4700       case SpvCapabilityMinLod:
4701          spv_check_supported(min_lod, cap);
4702          break;
4703 
4704       case SpvCapabilityAtomicStorage:
4705          spv_check_supported(atomic_storage, cap);
4706          break;
4707 
4708       case SpvCapabilityFloat64:
4709          spv_check_supported(float64, cap);
4710          break;
4711       case SpvCapabilityInt64:
4712          spv_check_supported(int64, cap);
4713          break;
4714       case SpvCapabilityInt16:
4715          spv_check_supported(int16, cap);
4716          break;
4717       case SpvCapabilityInt8:
4718          spv_check_supported(int8, cap);
4719          break;
4720 
4721       case SpvCapabilityTransformFeedback:
4722          spv_check_supported(transform_feedback, cap);
4723          break;
4724 
4725       case SpvCapabilityGeometryStreams:
4726          spv_check_supported(geometry_streams, cap);
4727          break;
4728 
4729       case SpvCapabilityInt64Atomics:
4730          spv_check_supported(int64_atomics, cap);
4731          break;
4732 
4733       case SpvCapabilityStorageImageMultisample:
4734          spv_check_supported(storage_image_ms, cap);
4735          break;
4736 
4737       case SpvCapabilityAddresses:
4738          spv_check_supported(address, cap);
4739          break;
4740 
4741       case SpvCapabilityKernel:
4742       case SpvCapabilityFloat16Buffer:
4743          spv_check_supported(kernel, cap);
4744          break;
4745 
4746       case SpvCapabilityGenericPointer:
4747          spv_check_supported(generic_pointers, cap);
4748          break;
4749 
4750       case SpvCapabilityImageBasic:
4751          spv_check_supported(kernel_image, cap);
4752          break;
4753 
4754       case SpvCapabilityImageReadWrite:
4755          spv_check_supported(kernel_image_read_write, cap);
4756          break;
4757 
4758       case SpvCapabilityLiteralSampler:
4759          spv_check_supported(literal_sampler, cap);
4760          break;
4761 
4762       case SpvCapabilityImageMipmap:
4763       case SpvCapabilityPipes:
4764       case SpvCapabilityDeviceEnqueue:
4765          vtn_warn("Unsupported OpenCL-style SPIR-V capability: %s",
4766                   spirv_capability_to_string(cap));
4767          break;
4768 
4769       case SpvCapabilityImageMSArray:
4770          spv_check_supported(image_ms_array, cap);
4771          break;
4772 
4773       case SpvCapabilityTessellation:
4774       case SpvCapabilityTessellationPointSize:
4775          spv_check_supported(tessellation, cap);
4776          break;
4777 
4778       case SpvCapabilityDrawParameters:
4779          spv_check_supported(draw_parameters, cap);
4780          break;
4781 
4782       case SpvCapabilityStorageImageReadWithoutFormat:
4783          spv_check_supported(image_read_without_format, cap);
4784          break;
4785 
4786       case SpvCapabilityStorageImageWriteWithoutFormat:
4787          spv_check_supported(image_write_without_format, cap);
4788          break;
4789 
4790       case SpvCapabilityDeviceGroup:
4791          spv_check_supported(device_group, cap);
4792          break;
4793 
4794       case SpvCapabilityMultiView:
4795          spv_check_supported(multiview, cap);
4796          break;
4797 
4798       case SpvCapabilityGroupNonUniform:
4799          spv_check_supported(subgroup_basic, cap);
4800          break;
4801 
4802       case SpvCapabilitySubgroupVoteKHR:
4803       case SpvCapabilityGroupNonUniformVote:
4804          spv_check_supported(subgroup_vote, cap);
4805          break;
4806 
4807       case SpvCapabilitySubgroupBallotKHR:
4808       case SpvCapabilityGroupNonUniformBallot:
4809          spv_check_supported(subgroup_ballot, cap);
4810          break;
4811 
4812       case SpvCapabilityGroupNonUniformShuffle:
4813       case SpvCapabilityGroupNonUniformShuffleRelative:
4814          spv_check_supported(subgroup_shuffle, cap);
4815          break;
4816 
4817       case SpvCapabilityGroupNonUniformQuad:
4818          spv_check_supported(subgroup_quad, cap);
4819          break;
4820 
4821       case SpvCapabilityGroupNonUniformArithmetic:
4822       case SpvCapabilityGroupNonUniformClustered:
4823          spv_check_supported(subgroup_arithmetic, cap);
4824          break;
4825 
4826       case SpvCapabilityGroups:
4827          spv_check_supported(groups, cap);
4828          break;
4829 
4830       case SpvCapabilitySubgroupDispatch:
4831          spv_check_supported(subgroup_dispatch, cap);
4832          /* Missing :
4833           *   - SpvOpGetKernelLocalSizeForSubgroupCount
4834           *   - SpvOpGetKernelMaxNumSubgroups
4835           */
4836          vtn_warn("Not fully supported capability: %s",
4837                   spirv_capability_to_string(cap));
4838          break;
4839 
4840       case SpvCapabilityVariablePointersStorageBuffer:
4841       case SpvCapabilityVariablePointers:
4842          spv_check_supported(variable_pointers, cap);
4843          b->variable_pointers = true;
4844          break;
4845 
4846       case SpvCapabilityStorageUniformBufferBlock16:
4847       case SpvCapabilityStorageUniform16:
4848       case SpvCapabilityStoragePushConstant16:
4849       case SpvCapabilityStorageInputOutput16:
4850          spv_check_supported(storage_16bit, cap);
4851          break;
4852 
4853       case SpvCapabilityShaderLayer:
4854       case SpvCapabilityShaderViewportIndex:
4855       case SpvCapabilityShaderViewportIndexLayerEXT:
4856          spv_check_supported(shader_viewport_index_layer, cap);
4857          break;
4858 
4859       case SpvCapabilityStorageBuffer8BitAccess:
4860       case SpvCapabilityUniformAndStorageBuffer8BitAccess:
4861       case SpvCapabilityStoragePushConstant8:
4862          spv_check_supported(storage_8bit, cap);
4863          break;
4864 
4865       case SpvCapabilityShaderNonUniformEXT:
4866          spv_check_supported(descriptor_indexing, cap);
4867          break;
4868 
4869       case SpvCapabilityInputAttachmentArrayDynamicIndexingEXT:
4870       case SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT:
4871       case SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT:
4872          spv_check_supported(descriptor_array_dynamic_indexing, cap);
4873          break;
4874 
4875       case SpvCapabilityUniformBufferArrayNonUniformIndexingEXT:
4876       case SpvCapabilitySampledImageArrayNonUniformIndexingEXT:
4877       case SpvCapabilityStorageBufferArrayNonUniformIndexingEXT:
4878       case SpvCapabilityStorageImageArrayNonUniformIndexingEXT:
4879       case SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT:
4880       case SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT:
4881       case SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT:
4882          spv_check_supported(descriptor_array_non_uniform_indexing, cap);
4883          break;
4884 
4885       case SpvCapabilityRuntimeDescriptorArrayEXT:
4886          spv_check_supported(runtime_descriptor_array, cap);
4887          break;
4888 
4889       case SpvCapabilityStencilExportEXT:
4890          spv_check_supported(stencil_export, cap);
4891          break;
4892 
4893       case SpvCapabilitySampleMaskPostDepthCoverage:
4894          spv_check_supported(post_depth_coverage, cap);
4895          break;
4896 
4897       case SpvCapabilityDenormFlushToZero:
4898       case SpvCapabilityDenormPreserve:
4899       case SpvCapabilitySignedZeroInfNanPreserve:
4900       case SpvCapabilityRoundingModeRTE:
4901       case SpvCapabilityRoundingModeRTZ:
4902          spv_check_supported(float_controls, cap);
4903          break;
4904 
4905       case SpvCapabilityPhysicalStorageBufferAddresses:
4906          spv_check_supported(physical_storage_buffer_address, cap);
4907          break;
4908 
4909       case SpvCapabilityComputeDerivativeGroupQuadsNV:
4910       case SpvCapabilityComputeDerivativeGroupLinearNV:
4911          spv_check_supported(derivative_group, cap);
4912          break;
4913 
4914       case SpvCapabilityFloat16:
4915          spv_check_supported(float16, cap);
4916          break;
4917 
4918       case SpvCapabilityFragmentShaderSampleInterlockEXT:
4919          spv_check_supported(fragment_shader_sample_interlock, cap);
4920          break;
4921 
4922       case SpvCapabilityFragmentShaderPixelInterlockEXT:
4923          spv_check_supported(fragment_shader_pixel_interlock, cap);
4924          break;
4925 
4926       case SpvCapabilityShaderSMBuiltinsNV:
4927          spv_check_supported(shader_sm_builtins_nv, cap);
4928          break;
4929 
4930       case SpvCapabilityDemoteToHelperInvocation:
4931          spv_check_supported(demote_to_helper_invocation, cap);
4932          b->uses_demote_to_helper_invocation = true;
4933          break;
4934 
4935       case SpvCapabilityShaderClockKHR:
4936          spv_check_supported(shader_clock, cap);
4937 	 break;
4938 
4939       case SpvCapabilityVulkanMemoryModel:
4940          spv_check_supported(vk_memory_model, cap);
4941          break;
4942 
4943       case SpvCapabilityVulkanMemoryModelDeviceScope:
4944          spv_check_supported(vk_memory_model_device_scope, cap);
4945          break;
4946 
4947       case SpvCapabilityImageReadWriteLodAMD:
4948          spv_check_supported(amd_image_read_write_lod, cap);
4949          break;
4950 
4951       case SpvCapabilityIntegerFunctions2INTEL:
4952          spv_check_supported(integer_functions2, cap);
4953          break;
4954 
4955       case SpvCapabilityFragmentMaskAMD:
4956          spv_check_supported(amd_fragment_mask, cap);
4957          break;
4958 
4959       case SpvCapabilityImageGatherBiasLodAMD:
4960          spv_check_supported(amd_image_gather_bias_lod, cap);
4961          b->image_gather_bias_lod = true;
4962          break;
4963 
4964       case SpvCapabilityAtomicFloat16AddEXT:
4965          spv_check_supported(float16_atomic_add, cap);
4966          break;
4967 
4968       case SpvCapabilityAtomicFloat32AddEXT:
4969          spv_check_supported(float32_atomic_add, cap);
4970          break;
4971 
4972       case SpvCapabilityAtomicFloat64AddEXT:
4973          spv_check_supported(float64_atomic_add, cap);
4974          break;
4975 
4976       case SpvCapabilitySubgroupShuffleINTEL:
4977          spv_check_supported(intel_subgroup_shuffle, cap);
4978          break;
4979 
4980       case SpvCapabilitySubgroupBufferBlockIOINTEL:
4981          spv_check_supported(intel_subgroup_buffer_block_io, cap);
4982          break;
4983 
4984       case SpvCapabilityRayCullMaskKHR:
4985          spv_check_supported(ray_cull_mask, cap);
4986          break;
4987 
4988       case SpvCapabilityRayTracingKHR:
4989          spv_check_supported(ray_tracing, cap);
4990          break;
4991 
4992       case SpvCapabilityRayQueryKHR:
4993          spv_check_supported(ray_query, cap);
4994          break;
4995 
4996       case SpvCapabilityRayTraversalPrimitiveCullingKHR:
4997          spv_check_supported(ray_traversal_primitive_culling, cap);
4998          break;
4999 
5000       case SpvCapabilityInt64ImageEXT:
5001          spv_check_supported(image_atomic_int64, cap);
5002          break;
5003 
5004       case SpvCapabilityFragmentShadingRateKHR:
5005          spv_check_supported(fragment_shading_rate, cap);
5006          break;
5007 
5008       case SpvCapabilityWorkgroupMemoryExplicitLayoutKHR:
5009          spv_check_supported(workgroup_memory_explicit_layout, cap);
5010          break;
5011 
5012       case SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR:
5013          spv_check_supported(workgroup_memory_explicit_layout, cap);
5014          spv_check_supported(storage_8bit, cap);
5015          break;
5016 
5017       case SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR:
5018          spv_check_supported(workgroup_memory_explicit_layout, cap);
5019          spv_check_supported(storage_16bit, cap);
5020          break;
5021 
5022       case SpvCapabilityAtomicFloat16MinMaxEXT:
5023          spv_check_supported(float16_atomic_min_max, cap);
5024          break;
5025 
5026       case SpvCapabilityAtomicFloat32MinMaxEXT:
5027          spv_check_supported(float32_atomic_min_max, cap);
5028          break;
5029 
5030       case SpvCapabilityAtomicFloat64MinMaxEXT:
5031          spv_check_supported(float64_atomic_min_max, cap);
5032          break;
5033 
5034       case SpvCapabilityMeshShadingEXT:
5035          spv_check_supported(mesh_shading, cap);
5036          break;
5037 
5038       case SpvCapabilityMeshShadingNV:
5039          spv_check_supported(mesh_shading_nv, cap);
5040          break;
5041 
5042       case SpvCapabilityPerViewAttributesNV:
5043          spv_check_supported(per_view_attributes_nv, cap);
5044          break;
5045 
5046       case SpvCapabilityShaderViewportMaskNV:
5047          spv_check_supported(shader_viewport_mask_nv, cap);
5048          break;
5049 
5050       case SpvCapabilityGroupNonUniformRotateKHR:
5051          spv_check_supported(subgroup_rotate, cap);
5052          break;
5053 
5054       case SpvCapabilityFragmentFullyCoveredEXT:
5055          spv_check_supported(fragment_fully_covered, cap);
5056          break;
5057 
5058       case SpvCapabilityFragmentDensityEXT:
5059          spv_check_supported(fragment_density, cap);
5060          break;
5061 
5062       case SpvCapabilityRayTracingPositionFetchKHR:
5063       case SpvCapabilityRayQueryPositionFetchKHR:
5064          spv_check_supported(ray_tracing_position_fetch, cap);
5065          break;
5066 
5067       case SpvCapabilityFragmentBarycentricKHR:
5068          spv_check_supported(fragment_barycentric, cap);
5069          break;
5070 
5071       case SpvCapabilityShaderEnqueueAMDX:
5072          spv_check_supported(shader_enqueue, cap);
5073          break;
5074 
5075       case SpvCapabilityCooperativeMatrixKHR:
5076          spv_check_supported(cooperative_matrix, cap);
5077          break;
5078 
5079       case SpvCapabilityQuadControlKHR:
5080          spv_check_supported(quad_control, cap);
5081          break;
5082 
5083       default:
5084          vtn_fail("Unhandled capability: %s (%u)",
5085                   spirv_capability_to_string(cap), cap);
5086       }
5087       break;
5088    }
5089 
5090    case SpvOpExtInstImport:
5091       vtn_handle_extension(b, opcode, w, count);
5092       break;
5093 
5094    case SpvOpMemoryModel:
5095       switch (w[1]) {
5096       case SpvAddressingModelPhysical32:
5097          vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
5098                      "AddressingModelPhysical32 only supported for kernels");
5099          b->shader->info.cs.ptr_size = 32;
5100          b->physical_ptrs = true;
5101          assert(nir_address_format_bit_size(b->options->global_addr_format) == 32);
5102          assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
5103          assert(nir_address_format_bit_size(b->options->shared_addr_format) == 32);
5104          assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
5105          assert(nir_address_format_bit_size(b->options->constant_addr_format) == 32);
5106          assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
5107          break;
5108       case SpvAddressingModelPhysical64:
5109          vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
5110                      "AddressingModelPhysical64 only supported for kernels");
5111          b->shader->info.cs.ptr_size = 64;
5112          b->physical_ptrs = true;
5113          assert(nir_address_format_bit_size(b->options->global_addr_format) == 64);
5114          assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
5115          assert(nir_address_format_bit_size(b->options->shared_addr_format) == 64);
5116          assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
5117          assert(nir_address_format_bit_size(b->options->constant_addr_format) == 64);
5118          assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
5119          break;
5120       case SpvAddressingModelLogical:
5121          vtn_fail_if(b->shader->info.stage == MESA_SHADER_KERNEL,
5122                      "AddressingModelLogical only supported for shaders");
5123          b->physical_ptrs = false;
5124          break;
5125       case SpvAddressingModelPhysicalStorageBuffer64:
5126          vtn_fail_if(!b->options ||
5127                      !b->options->caps.physical_storage_buffer_address,
5128                      "AddressingModelPhysicalStorageBuffer64 not supported");
5129          break;
5130       default:
5131          vtn_fail("Unknown addressing model: %s (%u)",
5132                   spirv_addressingmodel_to_string(w[1]), w[1]);
5133          break;
5134       }
5135 
5136       b->mem_model = w[2];
5137       switch (w[2]) {
5138       case SpvMemoryModelSimple:
5139       case SpvMemoryModelGLSL450:
5140       case SpvMemoryModelOpenCL:
5141          break;
5142       case SpvMemoryModelVulkan:
5143          vtn_fail_if(!b->options->caps.vk_memory_model,
5144                      "Vulkan memory model is unsupported by this driver");
5145          break;
5146       default:
5147          vtn_fail("Unsupported memory model: %s",
5148                   spirv_memorymodel_to_string(w[2]));
5149          break;
5150       }
5151       break;
5152 
5153    case SpvOpEntryPoint:
5154       vtn_handle_entry_point(b, w, count);
5155       break;
5156 
5157    case SpvOpName:
5158       b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
5159       break;
5160 
5161    case SpvOpMemberName:
5162    case SpvOpExecutionMode:
5163    case SpvOpExecutionModeId:
5164    case SpvOpDecorationGroup:
5165    case SpvOpDecorate:
5166    case SpvOpDecorateId:
5167    case SpvOpMemberDecorate:
5168    case SpvOpGroupDecorate:
5169    case SpvOpGroupMemberDecorate:
5170    case SpvOpDecorateString:
5171    case SpvOpMemberDecorateString:
5172       vtn_handle_decoration(b, opcode, w, count);
5173       break;
5174 
5175    case SpvOpExtInst: {
5176       struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5177       if (val->ext_handler == vtn_handle_non_semantic_instruction) {
5178          /* NonSemantic extended instructions are acceptable in preamble. */
5179          vtn_handle_non_semantic_instruction(b, w[4], w, count);
5180          return true;
5181       } else {
5182          return false; /* End of preamble. */
5183       }
5184    }
5185 
5186    default:
5187       return false; /* End of preamble */
5188    }
5189 
5190    return true;
5191 }
5192 
5193 void
vtn_handle_debug_text(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5194 vtn_handle_debug_text(struct vtn_builder *b, SpvOp opcode,
5195                       const uint32_t *w, unsigned count)
5196 {
5197    switch (opcode) {
5198    case SpvOpString:
5199       vtn_push_value(b, w[1], vtn_value_type_string)->str =
5200          vtn_string_literal(b, &w[2], count - 2, NULL);
5201       break;
5202 
5203    case SpvOpSource: {
5204       const char *lang;
5205       switch (w[1]) {
5206       default:
5207       case SpvSourceLanguageUnknown:      lang = "unknown";    break;
5208       case SpvSourceLanguageESSL:         lang = "ESSL";       break;
5209       case SpvSourceLanguageGLSL:         lang = "GLSL";       break;
5210       case SpvSourceLanguageOpenCL_C:     lang = "OpenCL C";   break;
5211       case SpvSourceLanguageOpenCL_CPP:   lang = "OpenCL C++"; break;
5212       case SpvSourceLanguageHLSL:         lang = "HLSL";       break;
5213       }
5214 
5215       uint32_t version = w[2];
5216 
5217       const char *file =
5218          (count > 3) ? vtn_value(b, w[3], vtn_value_type_string)->str : "";
5219 
5220       vtn_info("Parsing SPIR-V from %s %u source file %s", lang, version, file);
5221 
5222       b->source_lang = w[1];
5223       break;
5224    }
5225 
5226    case SpvOpSourceExtension:
5227    case SpvOpSourceContinued:
5228    case SpvOpModuleProcessed:
5229       /* Unhandled, but these are for debug so that's ok. */
5230       break;
5231 
5232    default:
5233       unreachable("Unhandled opcode");
5234    }
5235 }
5236 
5237 static void
vtn_handle_execution_mode(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5238 vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
5239                           const struct vtn_decoration *mode, UNUSED void *data)
5240 {
5241    vtn_assert(b->entry_point == entry_point);
5242 
5243    switch(mode->exec_mode) {
5244    case SpvExecutionModeOriginUpperLeft:
5245    case SpvExecutionModeOriginLowerLeft:
5246       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5247       b->shader->info.fs.origin_upper_left =
5248          (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
5249       break;
5250 
5251    case SpvExecutionModeEarlyFragmentTests:
5252       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5253       b->shader->info.fs.early_fragment_tests = true;
5254       break;
5255 
5256    case SpvExecutionModePostDepthCoverage:
5257       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5258       b->shader->info.fs.post_depth_coverage = true;
5259       break;
5260 
5261    case SpvExecutionModeInvocations:
5262       vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5263       b->shader->info.gs.invocations = MAX2(1, mode->operands[0]);
5264       break;
5265 
5266    case SpvExecutionModeDepthReplacing:
5267       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5268       if (b->shader->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE)
5269          b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
5270       break;
5271    case SpvExecutionModeDepthGreater:
5272       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5273       b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
5274       break;
5275    case SpvExecutionModeDepthLess:
5276       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5277       b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
5278       break;
5279    case SpvExecutionModeDepthUnchanged:
5280       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5281       b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
5282       break;
5283 
5284    case SpvExecutionModeLocalSizeHint:
5285       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5286       b->shader->info.cs.workgroup_size_hint[0] = mode->operands[0];
5287       b->shader->info.cs.workgroup_size_hint[1] = mode->operands[1];
5288       b->shader->info.cs.workgroup_size_hint[2] = mode->operands[2];
5289       break;
5290 
5291    case SpvExecutionModeLocalSize:
5292       if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5293          b->shader->info.workgroup_size[0] = mode->operands[0];
5294          b->shader->info.workgroup_size[1] = mode->operands[1];
5295          b->shader->info.workgroup_size[2] = mode->operands[2];
5296       } else {
5297          vtn_fail("Execution mode LocalSize not supported in stage %s",
5298                   _mesa_shader_stage_to_string(b->shader->info.stage));
5299       }
5300       break;
5301 
5302    case SpvExecutionModeOutputVertices:
5303       switch (b->shader->info.stage) {
5304       case MESA_SHADER_TESS_CTRL:
5305       case MESA_SHADER_TESS_EVAL:
5306          b->shader->info.tess.tcs_vertices_out = mode->operands[0];
5307          break;
5308       case MESA_SHADER_GEOMETRY:
5309          b->shader->info.gs.vertices_out = mode->operands[0];
5310          break;
5311       case MESA_SHADER_MESH:
5312          b->shader->info.mesh.max_vertices_out = mode->operands[0];
5313          break;
5314       default:
5315          vtn_fail("Execution mode OutputVertices not supported in stage %s",
5316                   _mesa_shader_stage_to_string(b->shader->info.stage));
5317          break;
5318       }
5319       break;
5320 
5321    case SpvExecutionModeInputPoints:
5322    case SpvExecutionModeInputLines:
5323    case SpvExecutionModeInputLinesAdjacency:
5324    case SpvExecutionModeTriangles:
5325    case SpvExecutionModeInputTrianglesAdjacency:
5326    case SpvExecutionModeQuads:
5327    case SpvExecutionModeIsolines:
5328       if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5329           b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
5330          b->shader->info.tess._primitive_mode =
5331             tess_primitive_mode_from_spv_execution_mode(b, mode->exec_mode);
5332       } else {
5333          vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5334          b->shader->info.gs.vertices_in =
5335             vertices_in_from_spv_execution_mode(b, mode->exec_mode);
5336          b->shader->info.gs.input_primitive =
5337             primitive_from_spv_execution_mode(b, mode->exec_mode);
5338       }
5339       break;
5340 
5341    case SpvExecutionModeOutputPrimitivesNV:
5342       vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5343       b->shader->info.mesh.max_primitives_out = mode->operands[0];
5344       break;
5345 
5346    case SpvExecutionModeOutputLinesNV:
5347    case SpvExecutionModeOutputTrianglesNV:
5348       vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5349       b->shader->info.mesh.primitive_type =
5350          primitive_from_spv_execution_mode(b, mode->exec_mode);
5351       break;
5352 
5353    case SpvExecutionModeOutputPoints: {
5354       const unsigned primitive =
5355          primitive_from_spv_execution_mode(b, mode->exec_mode);
5356 
5357       switch (b->shader->info.stage) {
5358       case MESA_SHADER_GEOMETRY:
5359          b->shader->info.gs.output_primitive = primitive;
5360          break;
5361       case MESA_SHADER_MESH:
5362          b->shader->info.mesh.primitive_type = primitive;
5363          break;
5364       default:
5365          vtn_fail("Execution mode OutputPoints not supported in stage %s",
5366                   _mesa_shader_stage_to_string(b->shader->info.stage));
5367          break;
5368       }
5369       break;
5370    }
5371 
5372    case SpvExecutionModeOutputLineStrip:
5373    case SpvExecutionModeOutputTriangleStrip:
5374       vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5375       b->shader->info.gs.output_primitive =
5376          primitive_from_spv_execution_mode(b, mode->exec_mode);
5377       break;
5378 
5379    case SpvExecutionModeSpacingEqual:
5380       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5381                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5382       b->shader->info.tess.spacing = TESS_SPACING_EQUAL;
5383       break;
5384    case SpvExecutionModeSpacingFractionalEven:
5385       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5386                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5387       b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN;
5388       break;
5389    case SpvExecutionModeSpacingFractionalOdd:
5390       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5391                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5392       b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD;
5393       break;
5394    case SpvExecutionModeVertexOrderCw:
5395       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5396                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5397       b->shader->info.tess.ccw = false;
5398       break;
5399    case SpvExecutionModeVertexOrderCcw:
5400       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5401                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5402       b->shader->info.tess.ccw = true;
5403       break;
5404    case SpvExecutionModePointMode:
5405       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5406                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5407       b->shader->info.tess.point_mode = true;
5408       break;
5409 
5410    case SpvExecutionModePixelCenterInteger:
5411       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5412       b->shader->info.fs.pixel_center_integer = true;
5413       break;
5414 
5415    case SpvExecutionModeXfb:
5416       b->shader->info.has_transform_feedback_varyings = true;
5417       break;
5418 
5419    case SpvExecutionModeVecTypeHint:
5420       break; /* OpenCL */
5421 
5422    case SpvExecutionModeContractionOff:
5423       if (b->shader->info.stage != MESA_SHADER_KERNEL)
5424          vtn_warn("ExectionMode only allowed for CL-style kernels: %s",
5425                   spirv_executionmode_to_string(mode->exec_mode));
5426       else
5427          b->exact = true;
5428       break;
5429 
5430    case SpvExecutionModeStencilRefReplacingEXT:
5431       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5432       break;
5433 
5434    case SpvExecutionModeDerivativeGroupQuadsNV:
5435       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5436       b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_QUADS;
5437       break;
5438 
5439    case SpvExecutionModeDerivativeGroupLinearNV:
5440       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5441       b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_LINEAR;
5442       break;
5443 
5444    case SpvExecutionModePixelInterlockOrderedEXT:
5445       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5446       b->shader->info.fs.pixel_interlock_ordered = true;
5447       break;
5448 
5449    case SpvExecutionModePixelInterlockUnorderedEXT:
5450       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5451       b->shader->info.fs.pixel_interlock_unordered = true;
5452       break;
5453 
5454    case SpvExecutionModeSampleInterlockOrderedEXT:
5455       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5456       b->shader->info.fs.sample_interlock_ordered = true;
5457       break;
5458 
5459    case SpvExecutionModeSampleInterlockUnorderedEXT:
5460       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5461       b->shader->info.fs.sample_interlock_unordered = true;
5462       break;
5463 
5464    case SpvExecutionModeDenormPreserve:
5465    case SpvExecutionModeDenormFlushToZero:
5466    case SpvExecutionModeSignedZeroInfNanPreserve:
5467    case SpvExecutionModeRoundingModeRTE:
5468    case SpvExecutionModeRoundingModeRTZ: {
5469       unsigned execution_mode = 0;
5470       switch (mode->exec_mode) {
5471       case SpvExecutionModeDenormPreserve:
5472          switch (mode->operands[0]) {
5473          case 16: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP16; break;
5474          case 32: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP32; break;
5475          case 64: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP64; break;
5476          default: vtn_fail("Floating point type not supported");
5477          }
5478          break;
5479       case SpvExecutionModeDenormFlushToZero:
5480          switch (mode->operands[0]) {
5481          case 16: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16; break;
5482          case 32: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32; break;
5483          case 64: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64; break;
5484          default: vtn_fail("Floating point type not supported");
5485          }
5486          break;
5487       case SpvExecutionModeSignedZeroInfNanPreserve:
5488          switch (mode->operands[0]) {
5489          case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break;
5490          case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break;
5491          case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break;
5492          default: vtn_fail("Floating point type not supported");
5493          }
5494          break;
5495       case SpvExecutionModeRoundingModeRTE:
5496          switch (mode->operands[0]) {
5497          case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16; break;
5498          case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32; break;
5499          case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64; break;
5500          default: vtn_fail("Floating point type not supported");
5501          }
5502          break;
5503       case SpvExecutionModeRoundingModeRTZ:
5504          switch (mode->operands[0]) {
5505          case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16; break;
5506          case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32; break;
5507          case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; break;
5508          default: vtn_fail("Floating point type not supported");
5509          }
5510          break;
5511       default:
5512          break;
5513       }
5514 
5515       b->shader->info.float_controls_execution_mode |= execution_mode;
5516 
5517       for (unsigned bit_size = 16; bit_size <= 64; bit_size *= 2) {
5518          vtn_fail_if(nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, bit_size) &&
5519                      nir_is_denorm_preserve(b->shader->info.float_controls_execution_mode, bit_size),
5520                      "Cannot flush to zero and preserve denorms for the same bit size.");
5521          vtn_fail_if(nir_is_rounding_mode_rtne(b->shader->info.float_controls_execution_mode, bit_size) &&
5522                      nir_is_rounding_mode_rtz(b->shader->info.float_controls_execution_mode, bit_size),
5523                      "Cannot set rounding mode to RTNE and RTZ for the same bit size.");
5524       }
5525       break;
5526    }
5527 
5528    case SpvExecutionModeMaximallyReconvergesKHR:
5529       b->shader->info.maximally_reconverges = true;
5530       break;
5531 
5532    case SpvExecutionModeLocalSizeId:
5533    case SpvExecutionModeLocalSizeHintId:
5534    case SpvExecutionModeSubgroupsPerWorkgroupId:
5535    case SpvExecutionModeMaxNodeRecursionAMDX:
5536    case SpvExecutionModeStaticNumWorkgroupsAMDX:
5537    case SpvExecutionModeMaxNumWorkgroupsAMDX:
5538    case SpvExecutionModeShaderIndexAMDX:
5539       /* Handled later by vtn_handle_execution_mode_id(). */
5540       break;
5541 
5542    case SpvExecutionModeSubgroupSize:
5543       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5544       vtn_assert(b->shader->info.subgroup_size == SUBGROUP_SIZE_VARYING);
5545       b->shader->info.subgroup_size = mode->operands[0];
5546       break;
5547 
5548    case SpvExecutionModeSubgroupsPerWorkgroup:
5549       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5550       b->shader->info.num_subgroups = mode->operands[0];
5551       break;
5552 
5553    case SpvExecutionModeSubgroupUniformControlFlowKHR:
5554       /* There's no corresponding SPIR-V capability, so check here. */
5555       vtn_fail_if(!b->options->caps.subgroup_uniform_control_flow,
5556                   "SpvExecutionModeSubgroupUniformControlFlowKHR not supported.");
5557       break;
5558 
5559    case SpvExecutionModeEarlyAndLateFragmentTestsAMD:
5560       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5561       b->shader->info.fs.early_and_late_fragment_tests = true;
5562       break;
5563 
5564    case SpvExecutionModeStencilRefGreaterFrontAMD:
5565       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5566       b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_GREATER;
5567       break;
5568 
5569    case SpvExecutionModeStencilRefLessFrontAMD:
5570       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5571       b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_LESS;
5572       break;
5573 
5574    case SpvExecutionModeStencilRefUnchangedFrontAMD:
5575       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5576       b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5577       break;
5578 
5579    case SpvExecutionModeStencilRefGreaterBackAMD:
5580       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5581       b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_GREATER;
5582       break;
5583 
5584    case SpvExecutionModeStencilRefLessBackAMD:
5585       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5586       b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_LESS;
5587       break;
5588 
5589    case SpvExecutionModeStencilRefUnchangedBackAMD:
5590       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5591       b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5592       break;
5593 
5594    case SpvExecutionModeRequireFullQuadsKHR:
5595       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5596       b->shader->info.fs.require_full_quads = true;
5597       break;
5598 
5599    case SpvExecutionModeQuadDerivativesKHR:
5600       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5601       b->shader->info.fs.quad_derivatives = true;
5602       break;
5603 
5604    case SpvExecutionModeCoalescingAMDX:
5605       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5606       b->shader->info.cs.workgroup_count[0] = 1;
5607       b->shader->info.cs.workgroup_count[1] = 1;
5608       b->shader->info.cs.workgroup_count[2] = 1;
5609       break;
5610 
5611    default:
5612       vtn_fail("Unhandled execution mode: %s (%u)",
5613                spirv_executionmode_to_string(mode->exec_mode),
5614                mode->exec_mode);
5615    }
5616 }
5617 
5618 static void
vtn_handle_execution_mode_id(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5619 vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_point,
5620                              const struct vtn_decoration *mode, UNUSED void *data)
5621 {
5622 
5623    vtn_assert(b->entry_point == entry_point);
5624 
5625    switch (mode->exec_mode) {
5626    case SpvExecutionModeLocalSizeId:
5627       if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5628          b->shader->info.workgroup_size[0] = vtn_constant_uint(b, mode->operands[0]);
5629          b->shader->info.workgroup_size[1] = vtn_constant_uint(b, mode->operands[1]);
5630          b->shader->info.workgroup_size[2] = vtn_constant_uint(b, mode->operands[2]);
5631       } else {
5632          vtn_fail("Execution mode LocalSizeId not supported in stage %s",
5633                   _mesa_shader_stage_to_string(b->shader->info.stage));
5634       }
5635       break;
5636 
5637    case SpvExecutionModeLocalSizeHintId:
5638       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5639       b->shader->info.cs.workgroup_size_hint[0] = vtn_constant_uint(b, mode->operands[0]);
5640       b->shader->info.cs.workgroup_size_hint[1] = vtn_constant_uint(b, mode->operands[1]);
5641       b->shader->info.cs.workgroup_size_hint[2] = vtn_constant_uint(b, mode->operands[2]);
5642       break;
5643 
5644    case SpvExecutionModeSubgroupsPerWorkgroupId:
5645       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5646       b->shader->info.num_subgroups = vtn_constant_uint(b, mode->operands[0]);
5647       break;
5648 
5649    case SpvExecutionModeMaxNodeRecursionAMDX:
5650       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5651       break;
5652 
5653    case SpvExecutionModeStaticNumWorkgroupsAMDX:
5654       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5655       b->shader->info.cs.workgroup_count[0] = vtn_constant_uint(b, mode->operands[0]);
5656       b->shader->info.cs.workgroup_count[1] = vtn_constant_uint(b, mode->operands[1]);
5657       b->shader->info.cs.workgroup_count[2] = vtn_constant_uint(b, mode->operands[2]);
5658       assert(b->shader->info.cs.workgroup_count[0]);
5659       assert(b->shader->info.cs.workgroup_count[1]);
5660       assert(b->shader->info.cs.workgroup_count[2]);
5661       break;
5662 
5663    case SpvExecutionModeMaxNumWorkgroupsAMDX:
5664       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5665       break;
5666 
5667    case SpvExecutionModeShaderIndexAMDX:
5668       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5669       b->shader->info.cs.shader_index = vtn_constant_uint(b, mode->operands[0]);
5670       break;
5671 
5672    default:
5673       /* Nothing to do.  Literal execution modes already handled by
5674        * vtn_handle_execution_mode(). */
5675       break;
5676    }
5677 }
5678 
5679 static bool
vtn_handle_variable_or_type_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5680 vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
5681                                         const uint32_t *w, unsigned count)
5682 {
5683    vtn_set_instruction_result_type(b, opcode, w, count);
5684 
5685    switch (opcode) {
5686    case SpvOpSource:
5687    case SpvOpSourceContinued:
5688    case SpvOpSourceExtension:
5689    case SpvOpExtension:
5690    case SpvOpCapability:
5691    case SpvOpExtInstImport:
5692    case SpvOpMemoryModel:
5693    case SpvOpEntryPoint:
5694    case SpvOpExecutionMode:
5695    case SpvOpString:
5696    case SpvOpName:
5697    case SpvOpMemberName:
5698    case SpvOpDecorationGroup:
5699    case SpvOpDecorate:
5700    case SpvOpDecorateId:
5701    case SpvOpMemberDecorate:
5702    case SpvOpGroupDecorate:
5703    case SpvOpGroupMemberDecorate:
5704    case SpvOpDecorateString:
5705    case SpvOpMemberDecorateString:
5706       vtn_fail("Invalid opcode types and variables section");
5707       break;
5708 
5709    case SpvOpTypeVoid:
5710    case SpvOpTypeBool:
5711    case SpvOpTypeInt:
5712    case SpvOpTypeFloat:
5713    case SpvOpTypeVector:
5714    case SpvOpTypeMatrix:
5715    case SpvOpTypeImage:
5716    case SpvOpTypeSampler:
5717    case SpvOpTypeSampledImage:
5718    case SpvOpTypeArray:
5719    case SpvOpTypeRuntimeArray:
5720    case SpvOpTypeStruct:
5721    case SpvOpTypeOpaque:
5722    case SpvOpTypePointer:
5723    case SpvOpTypeForwardPointer:
5724    case SpvOpTypeFunction:
5725    case SpvOpTypeEvent:
5726    case SpvOpTypeDeviceEvent:
5727    case SpvOpTypeReserveId:
5728    case SpvOpTypeQueue:
5729    case SpvOpTypePipe:
5730    case SpvOpTypeAccelerationStructureKHR:
5731    case SpvOpTypeRayQueryKHR:
5732    case SpvOpTypeCooperativeMatrixKHR:
5733       vtn_handle_type(b, opcode, w, count);
5734       break;
5735 
5736    case SpvOpConstantTrue:
5737    case SpvOpConstantFalse:
5738    case SpvOpConstant:
5739    case SpvOpConstantComposite:
5740    case SpvOpConstantNull:
5741    case SpvOpSpecConstantTrue:
5742    case SpvOpSpecConstantFalse:
5743    case SpvOpSpecConstant:
5744    case SpvOpSpecConstantComposite:
5745    case SpvOpSpecConstantOp:
5746       vtn_handle_constant(b, opcode, w, count);
5747       break;
5748 
5749    case SpvOpUndef:
5750    case SpvOpVariable:
5751    case SpvOpConstantSampler:
5752       vtn_handle_variables(b, opcode, w, count);
5753       break;
5754 
5755    case SpvOpExtInst: {
5756       struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5757       /* NonSemantic extended instructions are acceptable in preamble, others
5758        * will indicate the end of preamble.
5759        */
5760       return val->ext_handler == vtn_handle_non_semantic_instruction;
5761    }
5762 
5763    default:
5764       return false; /* End of preamble */
5765    }
5766 
5767    return true;
5768 }
5769 
5770 static struct vtn_ssa_value *
vtn_nir_select(struct vtn_builder * b,struct vtn_ssa_value * src0,struct vtn_ssa_value * src1,struct vtn_ssa_value * src2)5771 vtn_nir_select(struct vtn_builder *b, struct vtn_ssa_value *src0,
5772                struct vtn_ssa_value *src1, struct vtn_ssa_value *src2)
5773 {
5774    struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
5775    dest->type = src1->type;
5776 
5777    if (src1->is_variable || src2->is_variable) {
5778       vtn_assert(src1->is_variable && src2->is_variable);
5779 
5780       nir_variable *dest_var =
5781          nir_local_variable_create(b->nb.impl, dest->type, "var_select");
5782       nir_deref_instr *dest_deref = nir_build_deref_var(&b->nb, dest_var);
5783 
5784       nir_push_if(&b->nb, src0->def);
5785       {
5786          nir_deref_instr *src1_deref = vtn_get_deref_for_ssa_value(b, src1);
5787          vtn_local_store(b, vtn_local_load(b, src1_deref, 0), dest_deref, 0);
5788       }
5789       nir_push_else(&b->nb, NULL);
5790       {
5791          nir_deref_instr *src2_deref = vtn_get_deref_for_ssa_value(b, src2);
5792          vtn_local_store(b, vtn_local_load(b, src2_deref, 0), dest_deref, 0);
5793       }
5794       nir_pop_if(&b->nb, NULL);
5795 
5796       vtn_set_ssa_value_var(b, dest, dest_var);
5797    } else if (glsl_type_is_vector_or_scalar(src1->type)) {
5798       dest->def = nir_bcsel(&b->nb, src0->def, src1->def, src2->def);
5799    } else {
5800       unsigned elems = glsl_get_length(src1->type);
5801 
5802       dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
5803       for (unsigned i = 0; i < elems; i++) {
5804          dest->elems[i] = vtn_nir_select(b, src0,
5805                                          src1->elems[i], src2->elems[i]);
5806       }
5807    }
5808 
5809    return dest;
5810 }
5811 
5812 static void
vtn_handle_select(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5813 vtn_handle_select(struct vtn_builder *b, SpvOp opcode,
5814                   const uint32_t *w, unsigned count)
5815 {
5816    /* Handle OpSelect up-front here because it needs to be able to handle
5817     * pointers and not just regular vectors and scalars.
5818     */
5819    struct vtn_value *res_val = vtn_untyped_value(b, w[2]);
5820    struct vtn_value *cond_val = vtn_untyped_value(b, w[3]);
5821    struct vtn_value *obj1_val = vtn_untyped_value(b, w[4]);
5822    struct vtn_value *obj2_val = vtn_untyped_value(b, w[5]);
5823 
5824    vtn_fail_if(obj1_val->type != res_val->type ||
5825                obj2_val->type != res_val->type,
5826                "Object types must match the result type in OpSelect (%%%u = %%%u ? %%%u : %%%u)", w[2], w[3], w[4], w[5]);
5827 
5828    vtn_fail_if((cond_val->type->base_type != vtn_base_type_scalar &&
5829                 cond_val->type->base_type != vtn_base_type_vector) ||
5830                !glsl_type_is_boolean(cond_val->type->type),
5831                "OpSelect must have either a vector of booleans or "
5832                "a boolean as Condition type");
5833 
5834    vtn_fail_if(cond_val->type->base_type == vtn_base_type_vector &&
5835                (res_val->type->base_type != vtn_base_type_vector ||
5836                 res_val->type->length != cond_val->type->length),
5837                "When Condition type in OpSelect is a vector, the Result "
5838                "type must be a vector of the same length");
5839 
5840    switch (res_val->type->base_type) {
5841    case vtn_base_type_scalar:
5842    case vtn_base_type_vector:
5843    case vtn_base_type_matrix:
5844    case vtn_base_type_array:
5845    case vtn_base_type_struct:
5846       /* OK. */
5847       break;
5848    case vtn_base_type_pointer:
5849       /* We need to have actual storage for pointer types. */
5850       vtn_fail_if(res_val->type->type == NULL,
5851                   "Invalid pointer result type for OpSelect");
5852       break;
5853    default:
5854       vtn_fail("Result type of OpSelect must be a scalar, composite, or pointer");
5855    }
5856 
5857    vtn_push_ssa_value(b, w[2],
5858       vtn_nir_select(b, vtn_ssa_value(b, w[3]),
5859                         vtn_ssa_value(b, w[4]),
5860                         vtn_ssa_value(b, w[5])));
5861 }
5862 
5863 static void
vtn_handle_ptr(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5864 vtn_handle_ptr(struct vtn_builder *b, SpvOp opcode,
5865                const uint32_t *w, unsigned count)
5866 {
5867    struct vtn_type *type1 = vtn_get_value_type(b, w[3]);
5868    struct vtn_type *type2 = vtn_get_value_type(b, w[4]);
5869    vtn_fail_if(type1->base_type != vtn_base_type_pointer ||
5870                type2->base_type != vtn_base_type_pointer,
5871                "%s operands must have pointer types",
5872                spirv_op_to_string(opcode));
5873    vtn_fail_if(type1->storage_class != type2->storage_class,
5874                "%s operands must have the same storage class",
5875                spirv_op_to_string(opcode));
5876 
5877    struct vtn_type *vtn_type = vtn_get_type(b, w[1]);
5878    const struct glsl_type *type = vtn_type->type;
5879 
5880    nir_address_format addr_format = vtn_mode_to_address_format(
5881       b, vtn_storage_class_to_mode(b, type1->storage_class, NULL, NULL));
5882 
5883    nir_def *def;
5884 
5885    switch (opcode) {
5886    case SpvOpPtrDiff: {
5887       /* OpPtrDiff returns the difference in number of elements (not byte offset). */
5888       unsigned elem_size, elem_align;
5889       glsl_get_natural_size_align_bytes(type1->deref->type,
5890                                         &elem_size, &elem_align);
5891 
5892       def = nir_build_addr_isub(&b->nb,
5893                                 vtn_get_nir_ssa(b, w[3]),
5894                                 vtn_get_nir_ssa(b, w[4]),
5895                                 addr_format);
5896       def = nir_idiv(&b->nb, def, nir_imm_intN_t(&b->nb, elem_size, def->bit_size));
5897       def = nir_i2iN(&b->nb, def, glsl_get_bit_size(type));
5898       break;
5899    }
5900 
5901    case SpvOpPtrEqual:
5902    case SpvOpPtrNotEqual: {
5903       def = nir_build_addr_ieq(&b->nb,
5904                                vtn_get_nir_ssa(b, w[3]),
5905                                vtn_get_nir_ssa(b, w[4]),
5906                                addr_format);
5907       if (opcode == SpvOpPtrNotEqual)
5908          def = nir_inot(&b->nb, def);
5909       break;
5910    }
5911 
5912    default:
5913       unreachable("Invalid ptr operation");
5914    }
5915 
5916    vtn_push_nir_ssa(b, w[2], def);
5917 }
5918 
5919 static void
vtn_handle_ray_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5920 vtn_handle_ray_intrinsic(struct vtn_builder *b, SpvOp opcode,
5921                          const uint32_t *w, unsigned count)
5922 {
5923    nir_intrinsic_instr *intrin;
5924 
5925    switch (opcode) {
5926    case SpvOpTraceNV:
5927    case SpvOpTraceRayKHR: {
5928       intrin = nir_intrinsic_instr_create(b->nb.shader,
5929                                           nir_intrinsic_trace_ray);
5930 
5931       /* The sources are in the same order in the NIR intrinsic */
5932       for (unsigned i = 0; i < 10; i++)
5933          intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
5934 
5935       nir_deref_instr *payload;
5936       if (opcode == SpvOpTraceNV)
5937          payload = vtn_get_call_payload_for_location(b, w[11]);
5938       else
5939          payload = vtn_nir_deref(b, w[11]);
5940       intrin->src[10] = nir_src_for_ssa(&payload->def);
5941       nir_builder_instr_insert(&b->nb, &intrin->instr);
5942       break;
5943    }
5944 
5945    case SpvOpReportIntersectionKHR: {
5946       intrin = nir_intrinsic_instr_create(b->nb.shader,
5947                                           nir_intrinsic_report_ray_intersection);
5948       intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
5949       intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
5950       nir_def_init(&intrin->instr, &intrin->def, 1, 1);
5951       nir_builder_instr_insert(&b->nb, &intrin->instr);
5952       vtn_push_nir_ssa(b, w[2], &intrin->def);
5953       break;
5954    }
5955 
5956    case SpvOpIgnoreIntersectionNV:
5957       intrin = nir_intrinsic_instr_create(b->nb.shader,
5958                                           nir_intrinsic_ignore_ray_intersection);
5959       nir_builder_instr_insert(&b->nb, &intrin->instr);
5960       break;
5961 
5962    case SpvOpTerminateRayNV:
5963       intrin = nir_intrinsic_instr_create(b->nb.shader,
5964                                           nir_intrinsic_terminate_ray);
5965       nir_builder_instr_insert(&b->nb, &intrin->instr);
5966       break;
5967 
5968    case SpvOpExecuteCallableNV:
5969    case SpvOpExecuteCallableKHR: {
5970       intrin = nir_intrinsic_instr_create(b->nb.shader,
5971                                           nir_intrinsic_execute_callable);
5972       intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
5973       nir_deref_instr *payload;
5974       if (opcode == SpvOpExecuteCallableNV)
5975          payload = vtn_get_call_payload_for_location(b, w[2]);
5976       else
5977          payload = vtn_nir_deref(b, w[2]);
5978       intrin->src[1] = nir_src_for_ssa(&payload->def);
5979       nir_builder_instr_insert(&b->nb, &intrin->instr);
5980       break;
5981    }
5982 
5983    default:
5984       vtn_fail_with_opcode("Unhandled opcode", opcode);
5985    }
5986 }
5987 
5988 static void
vtn_handle_write_packed_primitive_indices(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5989 vtn_handle_write_packed_primitive_indices(struct vtn_builder *b, SpvOp opcode,
5990                                           const uint32_t *w, unsigned count)
5991 {
5992    vtn_assert(opcode == SpvOpWritePackedPrimitiveIndices4x8NV);
5993 
5994    /* TODO(mesh): Use or create a primitive that allow the unpacking to
5995     * happen in the backend.  What we have here is functional but too
5996     * blunt.
5997     */
5998 
5999    struct vtn_type *offset_type = vtn_get_value_type(b, w[1]);
6000    vtn_fail_if(offset_type->base_type != vtn_base_type_scalar ||
6001                offset_type->type != glsl_uint_type(),
6002                "Index Offset type of OpWritePackedPrimitiveIndices4x8NV "
6003                "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
6004 
6005    struct vtn_type *packed_type = vtn_get_value_type(b, w[2]);
6006    vtn_fail_if(packed_type->base_type != vtn_base_type_scalar ||
6007                packed_type->type != glsl_uint_type(),
6008                "Packed Indices type of OpWritePackedPrimitiveIndices4x8NV "
6009                "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
6010 
6011    nir_deref_instr *indices = NULL;
6012    nir_foreach_variable_with_modes(var, b->nb.shader, nir_var_shader_out) {
6013       if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) {
6014          indices = nir_build_deref_var(&b->nb, var);
6015          break;
6016       }
6017    }
6018 
6019    /* It may be the case that the variable is not present in the
6020     * entry point interface list.
6021     *
6022     * See https://github.com/KhronosGroup/SPIRV-Registry/issues/104.
6023     */
6024 
6025    if (!indices) {
6026       unsigned vertices_per_prim =
6027          mesa_vertices_per_prim(b->shader->info.mesh.primitive_type);
6028       unsigned max_prim_indices =
6029          vertices_per_prim * b->shader->info.mesh.max_primitives_out;
6030       const struct glsl_type *t =
6031          glsl_array_type(glsl_uint_type(), max_prim_indices, 0);
6032       nir_variable *var =
6033          nir_variable_create(b->shader, nir_var_shader_out, t,
6034                              "gl_PrimitiveIndicesNV");
6035 
6036       var->data.location = VARYING_SLOT_PRIMITIVE_INDICES;
6037       var->data.interpolation = INTERP_MODE_NONE;
6038       indices = nir_build_deref_var(&b->nb, var);
6039    }
6040 
6041    nir_def *offset = vtn_get_nir_ssa(b, w[1]);
6042    nir_def *packed = vtn_get_nir_ssa(b, w[2]);
6043    nir_def *unpacked = nir_unpack_bits(&b->nb, packed, 8);
6044    for (int i = 0; i < 4; i++) {
6045       nir_deref_instr *offset_deref =
6046          nir_build_deref_array(&b->nb, indices,
6047                                nir_iadd_imm(&b->nb, offset, i));
6048       nir_def *val = nir_u2u32(&b->nb, nir_channel(&b->nb, unpacked, i));
6049 
6050       nir_store_deref(&b->nb, offset_deref, val, 0x1);
6051    }
6052 }
6053 
6054 struct ray_query_value {
6055    nir_ray_query_value     nir_value;
6056    const struct glsl_type *glsl_type;
6057 };
6058 
6059 static struct ray_query_value
spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode)6060 spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder *b,
6061                                       SpvOp opcode)
6062 {
6063    switch (opcode) {
6064 #define CASE(_spv, _nir, _type) case SpvOpRayQueryGet##_spv:            \
6065       return (struct ray_query_value) { .nir_value = nir_ray_query_value_##_nir, .glsl_type = _type }
6066       CASE(RayTMinKHR,                                            tmin,                                   glsl_floatN_t_type(32));
6067       CASE(RayFlagsKHR,                                           flags,                                  glsl_uint_type());
6068       CASE(WorldRayDirectionKHR,                                  world_ray_direction,                    glsl_vec_type(3));
6069       CASE(WorldRayOriginKHR,                                     world_ray_origin,                       glsl_vec_type(3));
6070       CASE(IntersectionTypeKHR,                                   intersection_type,                      glsl_uint_type());
6071       CASE(IntersectionTKHR,                                      intersection_t,                         glsl_floatN_t_type(32));
6072       CASE(IntersectionInstanceCustomIndexKHR,                    intersection_instance_custom_index,     glsl_int_type());
6073       CASE(IntersectionInstanceIdKHR,                             intersection_instance_id,               glsl_int_type());
6074       CASE(IntersectionInstanceShaderBindingTableRecordOffsetKHR, intersection_instance_sbt_index,        glsl_uint_type());
6075       CASE(IntersectionGeometryIndexKHR,                          intersection_geometry_index,            glsl_int_type());
6076       CASE(IntersectionPrimitiveIndexKHR,                         intersection_primitive_index,           glsl_int_type());
6077       CASE(IntersectionBarycentricsKHR,                           intersection_barycentrics,              glsl_vec_type(2));
6078       CASE(IntersectionFrontFaceKHR,                              intersection_front_face,                glsl_bool_type());
6079       CASE(IntersectionCandidateAABBOpaqueKHR,                    intersection_candidate_aabb_opaque,     glsl_bool_type());
6080       CASE(IntersectionObjectToWorldKHR,                          intersection_object_to_world,           glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
6081       CASE(IntersectionWorldToObjectKHR,                          intersection_world_to_object,           glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
6082       CASE(IntersectionObjectRayOriginKHR,                        intersection_object_ray_origin,         glsl_vec_type(3));
6083       CASE(IntersectionObjectRayDirectionKHR,                     intersection_object_ray_direction,      glsl_vec_type(3));
6084       CASE(IntersectionTriangleVertexPositionsKHR,                intersection_triangle_vertex_positions, glsl_array_type(glsl_vec_type(3), 3,
6085                                                                                                                           glsl_get_explicit_stride(glsl_vec_type(3))));
6086 #undef CASE
6087    default:
6088       vtn_fail_with_opcode("Unhandled opcode", opcode);
6089    }
6090 }
6091 
6092 static void
ray_query_load_intrinsic_create(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_def * src0,bool committed)6093 ray_query_load_intrinsic_create(struct vtn_builder *b, SpvOp opcode,
6094                                 const uint32_t *w, nir_def *src0,
6095                                 bool committed)
6096 {
6097    struct ray_query_value value =
6098       spirv_to_nir_type_ray_query_intrinsic(b, opcode);
6099 
6100    if (glsl_type_is_array_or_matrix(value.glsl_type)) {
6101       const struct glsl_type *elem_type = glsl_get_array_element(value.glsl_type);
6102       const unsigned elems = glsl_get_length(value.glsl_type);
6103 
6104       struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, value.glsl_type);
6105       for (unsigned i = 0; i < elems; i++) {
6106          ssa->elems[i]->def =
6107             nir_rq_load(&b->nb,
6108                         glsl_get_vector_elements(elem_type),
6109                         glsl_get_bit_size(elem_type),
6110                         src0,
6111                         .ray_query_value = value.nir_value,
6112                         .committed = committed,
6113                         .column = i);
6114       }
6115 
6116       vtn_push_ssa_value(b, w[2], ssa);
6117    } else {
6118       assert(glsl_type_is_vector_or_scalar(value.glsl_type));
6119 
6120       vtn_push_nir_ssa(b, w[2],
6121                        nir_rq_load(&b->nb,
6122                                    glsl_get_vector_elements(value.glsl_type),
6123                                    glsl_get_bit_size(value.glsl_type),
6124                                    src0,
6125                                    .ray_query_value = value.nir_value,
6126                                    .committed = committed));
6127    }
6128 }
6129 
6130 static void
vtn_handle_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6131 vtn_handle_ray_query_intrinsic(struct vtn_builder *b, SpvOp opcode,
6132                                const uint32_t *w, unsigned count)
6133 {
6134    switch (opcode) {
6135    case SpvOpRayQueryInitializeKHR: {
6136       nir_intrinsic_instr *intrin =
6137          nir_intrinsic_instr_create(b->nb.shader,
6138                                     nir_intrinsic_rq_initialize);
6139       /* The sources are in the same order in the NIR intrinsic */
6140       for (unsigned i = 0; i < 8; i++)
6141          intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
6142       nir_builder_instr_insert(&b->nb, &intrin->instr);
6143       break;
6144    }
6145 
6146    case SpvOpRayQueryTerminateKHR:
6147       nir_rq_terminate(&b->nb, vtn_ssa_value(b, w[1])->def);
6148       break;
6149 
6150    case SpvOpRayQueryProceedKHR:
6151       vtn_push_nir_ssa(b, w[2],
6152                        nir_rq_proceed(&b->nb, 1, vtn_ssa_value(b, w[3])->def));
6153       break;
6154 
6155    case SpvOpRayQueryGenerateIntersectionKHR:
6156       nir_rq_generate_intersection(&b->nb,
6157                                    vtn_ssa_value(b, w[1])->def,
6158                                    vtn_ssa_value(b, w[2])->def);
6159       break;
6160 
6161    case SpvOpRayQueryConfirmIntersectionKHR:
6162       nir_rq_confirm_intersection(&b->nb, vtn_ssa_value(b, w[1])->def);
6163       break;
6164 
6165    case SpvOpRayQueryGetIntersectionTKHR:
6166    case SpvOpRayQueryGetIntersectionTypeKHR:
6167    case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6168    case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6169    case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6170    case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6171    case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6172    case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6173    case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6174    case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6175    case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6176    case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6177    case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6178    case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6179       ray_query_load_intrinsic_create(b, opcode, w,
6180                                       vtn_ssa_value(b, w[3])->def,
6181                                       vtn_constant_uint(b, w[4]));
6182       break;
6183 
6184    case SpvOpRayQueryGetRayTMinKHR:
6185    case SpvOpRayQueryGetRayFlagsKHR:
6186    case SpvOpRayQueryGetWorldRayDirectionKHR:
6187    case SpvOpRayQueryGetWorldRayOriginKHR:
6188    case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6189       ray_query_load_intrinsic_create(b, opcode, w,
6190                                       vtn_ssa_value(b, w[3])->def,
6191                                       /* Committed value is ignored for these */
6192                                       false);
6193       break;
6194 
6195    default:
6196       vtn_fail_with_opcode("Unhandled opcode", opcode);
6197    }
6198 }
6199 
6200 static void
vtn_handle_initialize_node_payloads(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6201 vtn_handle_initialize_node_payloads(struct vtn_builder *b, SpvOp opcode,
6202                                     const uint32_t *w, unsigned count)
6203 {
6204    vtn_assert(opcode == SpvOpInitializeNodePayloadsAMDX);
6205 
6206    nir_def *payloads = vtn_ssa_value(b, w[1])->def;
6207    mesa_scope scope = vtn_translate_scope(b, vtn_constant_uint(b, w[2]));
6208    nir_def *payload_count = vtn_ssa_value(b, w[3])->def;
6209    nir_def *node_index = vtn_ssa_value(b, w[4])->def;
6210 
6211    nir_initialize_node_payloads(&b->nb, payloads, payload_count, node_index, .execution_scope = scope);
6212 }
6213 
6214 static bool
vtn_handle_body_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6215 vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
6216                             const uint32_t *w, unsigned count)
6217 {
6218    switch (opcode) {
6219    case SpvOpLabel:
6220       break;
6221 
6222    case SpvOpLoopMerge:
6223    case SpvOpSelectionMerge:
6224       /* This is handled by cfg pre-pass and walk_blocks */
6225       break;
6226 
6227    case SpvOpUndef: {
6228       struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
6229       val->type = vtn_get_type(b, w[1]);
6230       break;
6231    }
6232 
6233    case SpvOpExtInst:
6234       vtn_handle_extension(b, opcode, w, count);
6235       break;
6236 
6237    case SpvOpVariable:
6238    case SpvOpLoad:
6239    case SpvOpStore:
6240    case SpvOpCopyMemory:
6241    case SpvOpCopyMemorySized:
6242    case SpvOpAccessChain:
6243    case SpvOpPtrAccessChain:
6244    case SpvOpInBoundsAccessChain:
6245    case SpvOpInBoundsPtrAccessChain:
6246    case SpvOpArrayLength:
6247    case SpvOpConvertPtrToU:
6248    case SpvOpConvertUToPtr:
6249    case SpvOpGenericCastToPtrExplicit:
6250    case SpvOpGenericPtrMemSemantics:
6251    case SpvOpSubgroupBlockReadINTEL:
6252    case SpvOpSubgroupBlockWriteINTEL:
6253    case SpvOpConvertUToAccelerationStructureKHR:
6254       vtn_handle_variables(b, opcode, w, count);
6255       break;
6256 
6257    case SpvOpFunctionCall:
6258       vtn_handle_function_call(b, opcode, w, count);
6259       break;
6260 
6261    case SpvOpSampledImage:
6262    case SpvOpImage:
6263    case SpvOpImageSparseTexelsResident:
6264    case SpvOpImageSampleImplicitLod:
6265    case SpvOpImageSparseSampleImplicitLod:
6266    case SpvOpImageSampleExplicitLod:
6267    case SpvOpImageSparseSampleExplicitLod:
6268    case SpvOpImageSampleDrefImplicitLod:
6269    case SpvOpImageSparseSampleDrefImplicitLod:
6270    case SpvOpImageSampleDrefExplicitLod:
6271    case SpvOpImageSparseSampleDrefExplicitLod:
6272    case SpvOpImageSampleProjImplicitLod:
6273    case SpvOpImageSampleProjExplicitLod:
6274    case SpvOpImageSampleProjDrefImplicitLod:
6275    case SpvOpImageSampleProjDrefExplicitLod:
6276    case SpvOpImageFetch:
6277    case SpvOpImageSparseFetch:
6278    case SpvOpImageGather:
6279    case SpvOpImageSparseGather:
6280    case SpvOpImageDrefGather:
6281    case SpvOpImageSparseDrefGather:
6282    case SpvOpImageQueryLod:
6283    case SpvOpImageQueryLevels:
6284       vtn_handle_texture(b, opcode, w, count);
6285       break;
6286 
6287    case SpvOpImageRead:
6288    case SpvOpImageSparseRead:
6289    case SpvOpImageWrite:
6290    case SpvOpImageTexelPointer:
6291    case SpvOpImageQueryFormat:
6292    case SpvOpImageQueryOrder:
6293       vtn_handle_image(b, opcode, w, count);
6294       break;
6295 
6296    case SpvOpImageQuerySamples:
6297    case SpvOpImageQuerySizeLod:
6298    case SpvOpImageQuerySize: {
6299       struct vtn_type *image_type = vtn_get_value_type(b, w[3]);
6300       vtn_assert(image_type->base_type == vtn_base_type_image);
6301       if (glsl_type_is_image(image_type->glsl_image)) {
6302          vtn_handle_image(b, opcode, w, count);
6303       } else {
6304          vtn_assert(glsl_type_is_texture(image_type->glsl_image));
6305          vtn_handle_texture(b, opcode, w, count);
6306       }
6307       break;
6308    }
6309 
6310    case SpvOpFragmentMaskFetchAMD:
6311    case SpvOpFragmentFetchAMD:
6312       vtn_handle_texture(b, opcode, w, count);
6313       break;
6314 
6315    case SpvOpAtomicLoad:
6316    case SpvOpAtomicExchange:
6317    case SpvOpAtomicCompareExchange:
6318    case SpvOpAtomicCompareExchangeWeak:
6319    case SpvOpAtomicIIncrement:
6320    case SpvOpAtomicIDecrement:
6321    case SpvOpAtomicIAdd:
6322    case SpvOpAtomicISub:
6323    case SpvOpAtomicSMin:
6324    case SpvOpAtomicUMin:
6325    case SpvOpAtomicSMax:
6326    case SpvOpAtomicUMax:
6327    case SpvOpAtomicAnd:
6328    case SpvOpAtomicOr:
6329    case SpvOpAtomicXor:
6330    case SpvOpAtomicFAddEXT:
6331    case SpvOpAtomicFMinEXT:
6332    case SpvOpAtomicFMaxEXT:
6333    case SpvOpAtomicFlagTestAndSet: {
6334       struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
6335       if (pointer->value_type == vtn_value_type_image_pointer) {
6336          vtn_handle_image(b, opcode, w, count);
6337       } else {
6338          vtn_assert(pointer->value_type == vtn_value_type_pointer);
6339          vtn_handle_atomics(b, opcode, w, count);
6340       }
6341       break;
6342    }
6343 
6344    case SpvOpAtomicStore:
6345    case SpvOpAtomicFlagClear: {
6346       struct vtn_value *pointer = vtn_untyped_value(b, w[1]);
6347       if (pointer->value_type == vtn_value_type_image_pointer) {
6348          vtn_handle_image(b, opcode, w, count);
6349       } else {
6350          vtn_assert(pointer->value_type == vtn_value_type_pointer);
6351          vtn_handle_atomics(b, opcode, w, count);
6352       }
6353       break;
6354    }
6355 
6356    case SpvOpSelect:
6357       vtn_handle_select(b, opcode, w, count);
6358       break;
6359 
6360    case SpvOpSNegate:
6361    case SpvOpFNegate:
6362    case SpvOpNot:
6363    case SpvOpAny:
6364    case SpvOpAll:
6365    case SpvOpConvertFToU:
6366    case SpvOpConvertFToS:
6367    case SpvOpConvertSToF:
6368    case SpvOpConvertUToF:
6369    case SpvOpUConvert:
6370    case SpvOpSConvert:
6371    case SpvOpFConvert:
6372    case SpvOpQuantizeToF16:
6373    case SpvOpSatConvertSToU:
6374    case SpvOpSatConvertUToS:
6375    case SpvOpPtrCastToGeneric:
6376    case SpvOpGenericCastToPtr:
6377    case SpvOpIsNan:
6378    case SpvOpIsInf:
6379    case SpvOpIsFinite:
6380    case SpvOpIsNormal:
6381    case SpvOpSignBitSet:
6382    case SpvOpLessOrGreater:
6383    case SpvOpOrdered:
6384    case SpvOpUnordered:
6385    case SpvOpIAdd:
6386    case SpvOpFAdd:
6387    case SpvOpISub:
6388    case SpvOpFSub:
6389    case SpvOpIMul:
6390    case SpvOpFMul:
6391    case SpvOpUDiv:
6392    case SpvOpSDiv:
6393    case SpvOpFDiv:
6394    case SpvOpUMod:
6395    case SpvOpSRem:
6396    case SpvOpSMod:
6397    case SpvOpFRem:
6398    case SpvOpFMod:
6399    case SpvOpVectorTimesScalar:
6400    case SpvOpDot:
6401    case SpvOpIAddCarry:
6402    case SpvOpISubBorrow:
6403    case SpvOpUMulExtended:
6404    case SpvOpSMulExtended:
6405    case SpvOpShiftRightLogical:
6406    case SpvOpShiftRightArithmetic:
6407    case SpvOpShiftLeftLogical:
6408    case SpvOpLogicalEqual:
6409    case SpvOpLogicalNotEqual:
6410    case SpvOpLogicalOr:
6411    case SpvOpLogicalAnd:
6412    case SpvOpLogicalNot:
6413    case SpvOpBitwiseOr:
6414    case SpvOpBitwiseXor:
6415    case SpvOpBitwiseAnd:
6416    case SpvOpIEqual:
6417    case SpvOpFOrdEqual:
6418    case SpvOpFUnordEqual:
6419    case SpvOpINotEqual:
6420    case SpvOpFOrdNotEqual:
6421    case SpvOpFUnordNotEqual:
6422    case SpvOpULessThan:
6423    case SpvOpSLessThan:
6424    case SpvOpFOrdLessThan:
6425    case SpvOpFUnordLessThan:
6426    case SpvOpUGreaterThan:
6427    case SpvOpSGreaterThan:
6428    case SpvOpFOrdGreaterThan:
6429    case SpvOpFUnordGreaterThan:
6430    case SpvOpULessThanEqual:
6431    case SpvOpSLessThanEqual:
6432    case SpvOpFOrdLessThanEqual:
6433    case SpvOpFUnordLessThanEqual:
6434    case SpvOpUGreaterThanEqual:
6435    case SpvOpSGreaterThanEqual:
6436    case SpvOpFOrdGreaterThanEqual:
6437    case SpvOpFUnordGreaterThanEqual:
6438    case SpvOpDPdx:
6439    case SpvOpDPdy:
6440    case SpvOpFwidth:
6441    case SpvOpDPdxFine:
6442    case SpvOpDPdyFine:
6443    case SpvOpFwidthFine:
6444    case SpvOpDPdxCoarse:
6445    case SpvOpDPdyCoarse:
6446    case SpvOpFwidthCoarse:
6447    case SpvOpBitFieldInsert:
6448    case SpvOpBitFieldSExtract:
6449    case SpvOpBitFieldUExtract:
6450    case SpvOpBitReverse:
6451    case SpvOpBitCount:
6452    case SpvOpTranspose:
6453    case SpvOpOuterProduct:
6454    case SpvOpMatrixTimesScalar:
6455    case SpvOpVectorTimesMatrix:
6456    case SpvOpMatrixTimesVector:
6457    case SpvOpMatrixTimesMatrix:
6458    case SpvOpUCountLeadingZerosINTEL:
6459    case SpvOpUCountTrailingZerosINTEL:
6460    case SpvOpAbsISubINTEL:
6461    case SpvOpAbsUSubINTEL:
6462    case SpvOpIAddSatINTEL:
6463    case SpvOpUAddSatINTEL:
6464    case SpvOpIAverageINTEL:
6465    case SpvOpUAverageINTEL:
6466    case SpvOpIAverageRoundedINTEL:
6467    case SpvOpUAverageRoundedINTEL:
6468    case SpvOpISubSatINTEL:
6469    case SpvOpUSubSatINTEL:
6470    case SpvOpIMul32x16INTEL:
6471    case SpvOpUMul32x16INTEL:
6472       vtn_handle_alu(b, opcode, w, count);
6473       break;
6474 
6475    case SpvOpSDotKHR:
6476    case SpvOpUDotKHR:
6477    case SpvOpSUDotKHR:
6478    case SpvOpSDotAccSatKHR:
6479    case SpvOpUDotAccSatKHR:
6480    case SpvOpSUDotAccSatKHR:
6481       vtn_handle_integer_dot(b, opcode, w, count);
6482       break;
6483 
6484    case SpvOpBitcast:
6485       vtn_handle_bitcast(b, w, count);
6486       break;
6487 
6488    /* TODO: One day, we should probably do something with this information
6489     * For now, though, it's safe to implement them as no-ops.
6490     * Needed for Rusticl sycl support.
6491     */
6492    case SpvOpAssumeTrueKHR:
6493       break;
6494 
6495    case SpvOpExpectKHR:
6496    case SpvOpVectorExtractDynamic:
6497    case SpvOpVectorInsertDynamic:
6498    case SpvOpVectorShuffle:
6499    case SpvOpCompositeConstruct:
6500    case SpvOpCompositeExtract:
6501    case SpvOpCompositeInsert:
6502    case SpvOpCopyLogical:
6503    case SpvOpCopyObject:
6504       vtn_handle_composite(b, opcode, w, count);
6505       break;
6506 
6507    case SpvOpEmitVertex:
6508    case SpvOpEndPrimitive:
6509    case SpvOpEmitStreamVertex:
6510    case SpvOpEndStreamPrimitive:
6511    case SpvOpControlBarrier:
6512    case SpvOpMemoryBarrier:
6513       vtn_handle_barrier(b, opcode, w, count);
6514       break;
6515 
6516    case SpvOpGroupNonUniformElect:
6517    case SpvOpGroupNonUniformAll:
6518    case SpvOpGroupNonUniformAny:
6519    case SpvOpGroupNonUniformAllEqual:
6520    case SpvOpGroupNonUniformBroadcast:
6521    case SpvOpGroupNonUniformBroadcastFirst:
6522    case SpvOpGroupNonUniformBallot:
6523    case SpvOpGroupNonUniformInverseBallot:
6524    case SpvOpGroupNonUniformBallotBitExtract:
6525    case SpvOpGroupNonUniformBallotBitCount:
6526    case SpvOpGroupNonUniformBallotFindLSB:
6527    case SpvOpGroupNonUniformBallotFindMSB:
6528    case SpvOpGroupNonUniformShuffle:
6529    case SpvOpGroupNonUniformShuffleXor:
6530    case SpvOpGroupNonUniformShuffleUp:
6531    case SpvOpGroupNonUniformShuffleDown:
6532    case SpvOpGroupNonUniformIAdd:
6533    case SpvOpGroupNonUniformFAdd:
6534    case SpvOpGroupNonUniformIMul:
6535    case SpvOpGroupNonUniformFMul:
6536    case SpvOpGroupNonUniformSMin:
6537    case SpvOpGroupNonUniformUMin:
6538    case SpvOpGroupNonUniformFMin:
6539    case SpvOpGroupNonUniformSMax:
6540    case SpvOpGroupNonUniformUMax:
6541    case SpvOpGroupNonUniformFMax:
6542    case SpvOpGroupNonUniformBitwiseAnd:
6543    case SpvOpGroupNonUniformBitwiseOr:
6544    case SpvOpGroupNonUniformBitwiseXor:
6545    case SpvOpGroupNonUniformLogicalAnd:
6546    case SpvOpGroupNonUniformLogicalOr:
6547    case SpvOpGroupNonUniformLogicalXor:
6548    case SpvOpGroupNonUniformQuadBroadcast:
6549    case SpvOpGroupNonUniformQuadSwap:
6550    case SpvOpGroupNonUniformQuadAllKHR:
6551    case SpvOpGroupNonUniformQuadAnyKHR:
6552    case SpvOpGroupAll:
6553    case SpvOpGroupAny:
6554    case SpvOpGroupBroadcast:
6555    case SpvOpGroupIAdd:
6556    case SpvOpGroupFAdd:
6557    case SpvOpGroupFMin:
6558    case SpvOpGroupUMin:
6559    case SpvOpGroupSMin:
6560    case SpvOpGroupFMax:
6561    case SpvOpGroupUMax:
6562    case SpvOpGroupSMax:
6563    case SpvOpSubgroupBallotKHR:
6564    case SpvOpSubgroupFirstInvocationKHR:
6565    case SpvOpSubgroupReadInvocationKHR:
6566    case SpvOpSubgroupAllKHR:
6567    case SpvOpSubgroupAnyKHR:
6568    case SpvOpSubgroupAllEqualKHR:
6569    case SpvOpGroupIAddNonUniformAMD:
6570    case SpvOpGroupFAddNonUniformAMD:
6571    case SpvOpGroupFMinNonUniformAMD:
6572    case SpvOpGroupUMinNonUniformAMD:
6573    case SpvOpGroupSMinNonUniformAMD:
6574    case SpvOpGroupFMaxNonUniformAMD:
6575    case SpvOpGroupUMaxNonUniformAMD:
6576    case SpvOpGroupSMaxNonUniformAMD:
6577    case SpvOpSubgroupShuffleINTEL:
6578    case SpvOpSubgroupShuffleDownINTEL:
6579    case SpvOpSubgroupShuffleUpINTEL:
6580    case SpvOpSubgroupShuffleXorINTEL:
6581    case SpvOpGroupNonUniformRotateKHR:
6582       vtn_handle_subgroup(b, opcode, w, count);
6583       break;
6584 
6585    case SpvOpPtrDiff:
6586    case SpvOpPtrEqual:
6587    case SpvOpPtrNotEqual:
6588       vtn_handle_ptr(b, opcode, w, count);
6589       break;
6590 
6591    case SpvOpBeginInvocationInterlockEXT:
6592       nir_begin_invocation_interlock(&b->nb);
6593       break;
6594 
6595    case SpvOpEndInvocationInterlockEXT:
6596       nir_end_invocation_interlock(&b->nb);
6597       break;
6598 
6599    case SpvOpDemoteToHelperInvocation: {
6600       nir_demote(&b->nb);
6601       break;
6602    }
6603 
6604    case SpvOpIsHelperInvocationEXT: {
6605       vtn_push_nir_ssa(b, w[2], nir_is_helper_invocation(&b->nb, 1));
6606       break;
6607    }
6608 
6609    case SpvOpReadClockKHR: {
6610       SpvScope scope = vtn_constant_uint(b, w[3]);
6611       vtn_fail_if(scope != SpvScopeDevice && scope != SpvScopeSubgroup,
6612                   "OpReadClockKHR Scope must be either "
6613                   "ScopeDevice or ScopeSubgroup.");
6614 
6615       /* Operation supports two result types: uvec2 and uint64_t.  The NIR
6616        * intrinsic gives uvec2, so pack the result for the other case.
6617        */
6618       nir_def *result = nir_shader_clock(&b->nb, vtn_translate_scope(b, scope));
6619 
6620       struct vtn_type *type = vtn_get_type(b, w[1]);
6621       const struct glsl_type *dest_type = type->type;
6622 
6623       if (glsl_type_is_vector(dest_type)) {
6624          assert(dest_type == glsl_vector_type(GLSL_TYPE_UINT, 2));
6625       } else {
6626          assert(glsl_type_is_scalar(dest_type));
6627          assert(glsl_get_base_type(dest_type) == GLSL_TYPE_UINT64);
6628          result = nir_pack_64_2x32(&b->nb, result);
6629       }
6630 
6631       vtn_push_nir_ssa(b, w[2], result);
6632       break;
6633    }
6634 
6635    case SpvOpTraceNV:
6636    case SpvOpTraceRayKHR:
6637    case SpvOpReportIntersectionKHR:
6638    case SpvOpIgnoreIntersectionNV:
6639    case SpvOpTerminateRayNV:
6640    case SpvOpExecuteCallableNV:
6641    case SpvOpExecuteCallableKHR:
6642       vtn_handle_ray_intrinsic(b, opcode, w, count);
6643       break;
6644 
6645    case SpvOpRayQueryInitializeKHR:
6646    case SpvOpRayQueryTerminateKHR:
6647    case SpvOpRayQueryGenerateIntersectionKHR:
6648    case SpvOpRayQueryConfirmIntersectionKHR:
6649    case SpvOpRayQueryProceedKHR:
6650    case SpvOpRayQueryGetIntersectionTypeKHR:
6651    case SpvOpRayQueryGetRayTMinKHR:
6652    case SpvOpRayQueryGetRayFlagsKHR:
6653    case SpvOpRayQueryGetIntersectionTKHR:
6654    case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6655    case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6656    case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6657    case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6658    case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6659    case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6660    case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6661    case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6662    case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6663    case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6664    case SpvOpRayQueryGetWorldRayDirectionKHR:
6665    case SpvOpRayQueryGetWorldRayOriginKHR:
6666    case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6667    case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6668    case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6669       vtn_handle_ray_query_intrinsic(b, opcode, w, count);
6670       break;
6671 
6672    case SpvOpLifetimeStart:
6673    case SpvOpLifetimeStop:
6674       break;
6675 
6676    case SpvOpGroupAsyncCopy:
6677    case SpvOpGroupWaitEvents:
6678       vtn_handle_opencl_core_instruction(b, opcode, w, count);
6679       break;
6680 
6681    case SpvOpWritePackedPrimitiveIndices4x8NV:
6682       vtn_handle_write_packed_primitive_indices(b, opcode, w, count);
6683       break;
6684 
6685    case SpvOpSetMeshOutputsEXT:
6686       nir_set_vertex_and_primitive_count(
6687          &b->nb, vtn_get_nir_ssa(b, w[1]), vtn_get_nir_ssa(b, w[2]),
6688          nir_undef(&b->nb, 1, 32));
6689       break;
6690 
6691    case SpvOpInitializeNodePayloadsAMDX:
6692       vtn_handle_initialize_node_payloads(b, opcode, w, count);
6693       break;
6694 
6695    case SpvOpFinalizeNodePayloadsAMDX:
6696       break;
6697 
6698    case SpvOpFinishWritingNodePayloadAMDX:
6699       break;
6700 
6701    case SpvOpCooperativeMatrixLoadKHR:
6702    case SpvOpCooperativeMatrixStoreKHR:
6703    case SpvOpCooperativeMatrixLengthKHR:
6704    case SpvOpCooperativeMatrixMulAddKHR:
6705       vtn_handle_cooperative_instruction(b, opcode, w, count);
6706       break;
6707 
6708    default:
6709       vtn_fail_with_opcode("Unhandled opcode", opcode);
6710    }
6711 
6712    return true;
6713 }
6714 
6715 static bool
is_glslang(const struct vtn_builder * b)6716 is_glslang(const struct vtn_builder *b)
6717 {
6718    return b->generator_id == vtn_generator_glslang_reference_front_end ||
6719           b->generator_id == vtn_generator_shaderc_over_glslang;
6720 }
6721 
6722 struct vtn_builder*
vtn_create_builder(const uint32_t * words,size_t word_count,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options)6723 vtn_create_builder(const uint32_t *words, size_t word_count,
6724                    gl_shader_stage stage, const char *entry_point_name,
6725                    const struct spirv_to_nir_options *options)
6726 {
6727    /* Initialize the vtn_builder object */
6728    struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
6729 
6730    b->spirv = words;
6731    b->spirv_word_count = word_count;
6732    b->file = NULL;
6733    b->line = -1;
6734    b->col = -1;
6735    list_inithead(&b->functions);
6736    b->entry_point_stage = stage;
6737    b->entry_point_name = entry_point_name;
6738 
6739    /*
6740     * Handle the SPIR-V header (first 5 dwords).
6741     * Can't use vtx_assert() as the setjmp(3) target isn't initialized yet.
6742     */
6743    if (word_count <= 5)
6744       goto fail;
6745 
6746    if (words[0] != SpvMagicNumber) {
6747       vtn_err("words[0] was 0x%x, want 0x%x", words[0], SpvMagicNumber);
6748       goto fail;
6749    }
6750 
6751    b->version = words[1];
6752    if (b->version < 0x10000) {
6753       vtn_err("version was 0x%x, want >= 0x10000", b->version);
6754       goto fail;
6755    }
6756 
6757    b->generator_id = words[2] >> 16;
6758    uint16_t generator_version = words[2];
6759 
6760    unsigned value_id_bound = words[3];
6761    if (words[4] != 0) {
6762       vtn_err("words[4] was %u, want 0", words[4]);
6763       goto fail;
6764    }
6765 
6766    b->value_id_bound = value_id_bound;
6767 
6768    /* Allocate all the data that can be dropped after parsing using
6769     * a cheaper allocation strategy.  Use the value_id_bound and the
6770     * size of the common internal structs to approximate a good
6771     * buffer_size.
6772     */
6773    const linear_opts lin_opts = {
6774       .min_buffer_size = 2 * value_id_bound * (sizeof(struct vtn_value) +
6775                                                sizeof(struct vtn_ssa_value)),
6776    };
6777    b->lin_ctx = linear_context_with_opts(b, &lin_opts);
6778 
6779    struct spirv_to_nir_options *dup_options =
6780       vtn_alloc(b, struct spirv_to_nir_options);
6781    *dup_options = *options;
6782 
6783    b->options = dup_options;
6784    b->values = vtn_zalloc_array(b, struct vtn_value, value_id_bound);
6785 
6786 
6787    /* In GLSLang commit 8297936dd6eb3, their handling of barrier() was fixed
6788     * to provide correct memory semantics on compute shader barrier()
6789     * commands.  Prior to that, we need to fix them up ourselves.  This
6790     * GLSLang fix caused them to bump to generator version 3.
6791     */
6792    b->wa_glslang_cs_barrier = is_glslang(b) && generator_version < 3;
6793 
6794    /* Identifying the LLVM-SPIRV translator:
6795     *
6796     * The LLVM-SPIRV translator currently doesn't store any generator ID [1].
6797     * Our use case involving the SPIRV-Tools linker also mean we want to check
6798     * for that tool instead. Finally the SPIRV-Tools linker also stores its
6799     * generator ID in the wrong location [2].
6800     *
6801     * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1223
6802     * [2] : https://github.com/KhronosGroup/SPIRV-Tools/pull/4549
6803     */
6804    const bool is_llvm_spirv_translator =
6805       (b->generator_id == 0 &&
6806        generator_version == vtn_generator_spirv_tools_linker) ||
6807       b->generator_id == vtn_generator_spirv_tools_linker;
6808 
6809    /* The LLVM-SPIRV translator generates Undef initializers for _local
6810     * variables [1].
6811     *
6812     * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1224
6813     */
6814    b->wa_llvm_spirv_ignore_workgroup_initializer =
6815       b->options->environment == NIR_SPIRV_OPENCL && is_llvm_spirv_translator;
6816 
6817    /* Older versions of GLSLang would incorrectly emit OpReturn after
6818     * OpEmitMeshTasksEXT. This is incorrect since the latter is already
6819     * a terminator instruction.
6820     *
6821     * See https://github.com/KhronosGroup/glslang/issues/3020 for details.
6822     *
6823     * Clay Shader Compiler (used by GravityMark) is also affected.
6824     */
6825    b->wa_ignore_return_after_emit_mesh_tasks =
6826       (is_glslang(b) && generator_version < 11) ||
6827       (b->generator_id == vtn_generator_clay_shader_compiler &&
6828        generator_version < 18);
6829 
6830    if (b->options->environment == NIR_SPIRV_VULKAN && b->version < 0x10400)
6831       b->vars_used_indirectly = _mesa_pointer_set_create(b);
6832 
6833    if (b->options->environment == NIR_SPIRV_VULKAN)
6834       b->vars_used_indirectly = _mesa_pointer_set_create(b);
6835 
6836    return b;
6837  fail:
6838    ralloc_free(b);
6839    return NULL;
6840 }
6841 
6842 static nir_function *
vtn_emit_kernel_entry_point_wrapper(struct vtn_builder * b,nir_function * entry_point)6843 vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b,
6844                                     nir_function *entry_point)
6845 {
6846    vtn_assert(entry_point == b->entry_point->func->nir_func);
6847    vtn_fail_if(!entry_point->name, "entry points are required to have a name");
6848    const char *func_name =
6849       ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name);
6850 
6851    vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
6852 
6853    nir_function *main_entry_point = nir_function_create(b->shader, func_name);
6854    nir_function_impl *impl = nir_function_impl_create(main_entry_point);
6855    b->nb = nir_builder_at(nir_after_impl(impl));
6856    b->func_param_idx = 0;
6857 
6858    nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point);
6859 
6860    for (unsigned i = 0; i < entry_point->num_params; ++i) {
6861       struct vtn_type *param_type = b->entry_point->func->type->params[i];
6862 
6863       b->shader->info.cs.has_variable_shared_mem |=
6864          param_type->storage_class == SpvStorageClassWorkgroup;
6865 
6866       /* consider all pointers to function memory to be parameters passed
6867        * by value
6868        */
6869       bool is_by_val = param_type->base_type == vtn_base_type_pointer &&
6870          param_type->storage_class == SpvStorageClassFunction;
6871 
6872       /* input variable */
6873       nir_variable *in_var = rzalloc(b->nb.shader, nir_variable);
6874 
6875       if (is_by_val) {
6876          in_var->data.mode = nir_var_uniform;
6877          in_var->type = param_type->deref->type;
6878       } else if (param_type->base_type == vtn_base_type_image) {
6879          in_var->data.mode = nir_var_image;
6880          in_var->type = param_type->glsl_image;
6881          in_var->data.access =
6882             spirv_to_gl_access_qualifier(b, param_type->access_qualifier);
6883       } else if (param_type->base_type == vtn_base_type_sampler) {
6884          in_var->data.mode = nir_var_uniform;
6885          in_var->type = glsl_bare_sampler_type();
6886       } else {
6887          in_var->data.mode = nir_var_uniform;
6888          in_var->type = param_type->type;
6889       }
6890 
6891       in_var->data.read_only = true;
6892       in_var->data.location = i;
6893 
6894       nir_shader_add_variable(b->nb.shader, in_var);
6895 
6896       /* we have to copy the entire variable into function memory */
6897       if (is_by_val) {
6898          nir_variable *copy_var =
6899             nir_local_variable_create(impl, in_var->type, "copy_in");
6900          nir_copy_var(&b->nb, copy_var, in_var);
6901          call->params[i] =
6902             nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->def);
6903       } else if (param_type->base_type == vtn_base_type_image ||
6904                  param_type->base_type == vtn_base_type_sampler) {
6905          /* Don't load the var, just pass a deref of it */
6906          call->params[i] = nir_src_for_ssa(&nir_build_deref_var(&b->nb, in_var)->def);
6907       } else {
6908          call->params[i] = nir_src_for_ssa(nir_load_var(&b->nb, in_var));
6909       }
6910    }
6911 
6912    nir_builder_instr_insert(&b->nb, &call->instr);
6913 
6914    return main_entry_point;
6915 }
6916 
6917 static bool
can_remove(nir_variable * var,void * data)6918 can_remove(nir_variable *var, void *data)
6919 {
6920    const struct set *vars_used_indirectly = data;
6921    return !_mesa_set_search(vars_used_indirectly, var);
6922 }
6923 
6924 #ifndef NDEBUG
6925 static void
initialize_mesa_spirv_debug(void)6926 initialize_mesa_spirv_debug(void)
6927 {
6928    mesa_spirv_debug = debug_get_option_mesa_spirv_debug();
6929 }
6930 #endif
6931 
6932 nir_shader *
spirv_to_nir(const uint32_t * words,size_t word_count,struct nir_spirv_specialization * spec,unsigned num_spec,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options,const nir_shader_compiler_options * nir_options)6933 spirv_to_nir(const uint32_t *words, size_t word_count,
6934              struct nir_spirv_specialization *spec, unsigned num_spec,
6935              gl_shader_stage stage, const char *entry_point_name,
6936              const struct spirv_to_nir_options *options,
6937              const nir_shader_compiler_options *nir_options)
6938 
6939 {
6940 #ifndef NDEBUG
6941    static once_flag initialized_debug_flag = ONCE_FLAG_INIT;
6942    call_once(&initialized_debug_flag, initialize_mesa_spirv_debug);
6943 #endif
6944 
6945    const uint32_t *word_end = words + word_count;
6946 
6947    struct vtn_builder *b = vtn_create_builder(words, word_count,
6948                                               stage, entry_point_name,
6949                                               options);
6950 
6951    if (b == NULL)
6952       return NULL;
6953 
6954    /* See also _vtn_fail() */
6955    if (vtn_setjmp(b->fail_jump)) {
6956       ralloc_free(b);
6957       return NULL;
6958    }
6959 
6960    const char *dump_path = secure_getenv("MESA_SPIRV_DUMP_PATH");
6961    if (dump_path)
6962       vtn_dump_shader(b, dump_path, "spirv");
6963 
6964    b->shader = nir_shader_create(b, stage, nir_options, NULL);
6965    b->shader->info.subgroup_size = options->subgroup_size;
6966    b->shader->info.float_controls_execution_mode = options->float_controls_execution_mode;
6967    b->shader->info.cs.shader_index = options->shader_index;
6968    _mesa_sha1_compute(words, word_count * sizeof(uint32_t), b->shader->info.source_sha1);
6969 
6970    /* Skip the SPIR-V header, handled at vtn_create_builder */
6971    words+= 5;
6972 
6973    /* Handle all the preamble instructions */
6974    words = vtn_foreach_instruction(b, words, word_end,
6975                                    vtn_handle_preamble_instruction);
6976 
6977    /* DirectXShaderCompiler and glslang/shaderc both create OpKill from HLSL's
6978     * discard/clip, which uses demote semantics. DirectXShaderCompiler will use
6979     * demote if the extension is enabled, so we disable this workaround in that
6980     * case.
6981     *
6982     * Related glslang issue: https://github.com/KhronosGroup/glslang/issues/2416
6983     */
6984    bool dxsc = b->generator_id == vtn_generator_spiregg;
6985    b->convert_discard_to_demote = ((dxsc && !b->uses_demote_to_helper_invocation) ||
6986                                    (is_glslang(b) && b->source_lang == SpvSourceLanguageHLSL)) &&
6987                                   options->caps.demote_to_helper_invocation;
6988 
6989    if (!options->create_library && b->entry_point == NULL) {
6990       vtn_fail("Entry point not found for %s shader \"%s\"",
6991                _mesa_shader_stage_to_string(stage), entry_point_name);
6992       ralloc_free(b);
6993       return NULL;
6994    }
6995 
6996    /* Ensure a sane address mode is being used for function temps */
6997    assert(nir_address_format_bit_size(b->options->temp_addr_format) == nir_get_ptr_bitsize(b->shader));
6998    assert(nir_address_format_num_components(b->options->temp_addr_format) == 1);
6999 
7000    /* Set shader info defaults */
7001    if (stage == MESA_SHADER_GEOMETRY)
7002       b->shader->info.gs.invocations = 1;
7003 
7004    /* Parse execution modes. */
7005    if (!options->create_library)
7006       vtn_foreach_execution_mode(b, b->entry_point,
7007                                  vtn_handle_execution_mode, NULL);
7008 
7009    b->specializations = spec;
7010    b->num_specializations = num_spec;
7011 
7012    /* Handle all variable, type, and constant instructions */
7013    words = vtn_foreach_instruction(b, words, word_end,
7014                                    vtn_handle_variable_or_type_instruction);
7015 
7016    /* Parse execution modes that depend on IDs. Must happen after we have
7017     * constants parsed.
7018     */
7019    if (!options->create_library)
7020       vtn_foreach_execution_mode(b, b->entry_point,
7021                                  vtn_handle_execution_mode_id, NULL);
7022 
7023    if (b->workgroup_size_builtin) {
7024       vtn_assert(gl_shader_stage_uses_workgroup(stage));
7025       vtn_assert(b->workgroup_size_builtin->type->type ==
7026                  glsl_vector_type(GLSL_TYPE_UINT, 3));
7027 
7028       nir_const_value *const_size =
7029          b->workgroup_size_builtin->constant->values;
7030 
7031       b->shader->info.workgroup_size[0] = const_size[0].u32;
7032       b->shader->info.workgroup_size[1] = const_size[1].u32;
7033       b->shader->info.workgroup_size[2] = const_size[2].u32;
7034    }
7035 
7036    /* Set types on all vtn_values */
7037    vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
7038 
7039    vtn_build_cfg(b, words, word_end);
7040 
7041    if (!options->create_library) {
7042       assert(b->entry_point->value_type == vtn_value_type_function);
7043       b->entry_point->func->referenced = true;
7044    }
7045 
7046    bool progress;
7047    do {
7048       progress = false;
7049       vtn_foreach_function(func, &b->functions) {
7050          if ((options->create_library || func->referenced) && !func->emitted) {
7051             vtn_function_emit(b, func, vtn_handle_body_instruction);
7052             progress = true;
7053          }
7054       }
7055    } while (progress);
7056 
7057    if (!options->create_library) {
7058       vtn_assert(b->entry_point->value_type == vtn_value_type_function);
7059       nir_function *entry_point = b->entry_point->func->nir_func;
7060       vtn_assert(entry_point);
7061 
7062       entry_point->dont_inline = false;
7063       /* post process entry_points with input params */
7064       if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL)
7065          entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point);
7066 
7067       entry_point->is_entrypoint = true;
7068    }
7069 
7070    /* structurize the CFG */
7071    nir_lower_goto_ifs(b->shader);
7072 
7073    nir_validate_shader(b->shader, "after spirv cfg");
7074 
7075    nir_lower_continue_constructs(b->shader);
7076 
7077    /* A SPIR-V module can have multiple shaders stages and also multiple
7078     * shaders of the same stage.  Global variables are declared per-module.
7079     *
7080     * Starting in SPIR-V 1.4 the list of global variables is part of
7081     * OpEntryPoint, so only valid ones will be created.  Previous versions
7082     * only have Input and Output variables listed, so remove dead variables to
7083     * clean up the remaining ones.
7084     */
7085    if (!options->create_library && b->version < 0x10400) {
7086       const nir_remove_dead_variables_options dead_opts = {
7087          .can_remove_var = can_remove,
7088          .can_remove_var_data = b->vars_used_indirectly,
7089       };
7090       nir_remove_dead_variables(b->shader, ~(nir_var_function_temp |
7091                                              nir_var_shader_out |
7092                                              nir_var_shader_in |
7093                                              nir_var_system_value),
7094                                 b->vars_used_indirectly ? &dead_opts : NULL);
7095    }
7096 
7097    nir_foreach_variable_in_shader(var, b->shader) {
7098       switch (var->data.mode) {
7099       case nir_var_mem_ubo:
7100          b->shader->info.num_ubos++;
7101          break;
7102       case nir_var_mem_ssbo:
7103          b->shader->info.num_ssbos++;
7104          break;
7105       case nir_var_mem_push_const:
7106          vtn_assert(b->shader->num_uniforms == 0);
7107          b->shader->num_uniforms =
7108             glsl_get_explicit_size(glsl_without_array(var->type), false);
7109          break;
7110       }
7111    }
7112 
7113    /* We sometimes generate bogus derefs that, while never used, give the
7114     * validator a bit of heartburn.  Run dead code to get rid of them.
7115     */
7116    nir_opt_dce(b->shader);
7117 
7118    /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is
7119     * a Block, all of them will be and Blocks are explicitly laid out.
7120     */
7121    nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7122       if (glsl_type_is_interface(var->type)) {
7123          assert(b->options->caps.workgroup_memory_explicit_layout);
7124          b->shader->info.shared_memory_explicit_layout = true;
7125          break;
7126       }
7127    }
7128    if (b->shader->info.shared_memory_explicit_layout) {
7129       unsigned size = 0;
7130       nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7131          assert(glsl_type_is_interface(var->type));
7132          const bool align_to_stride = false;
7133          size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
7134       }
7135       b->shader->info.shared_size = size;
7136    }
7137 
7138    if (stage == MESA_SHADER_FRAGMENT) {
7139       /* From the Vulkan 1.2.199 spec:
7140        *
7141        *    "If a fragment shader entry point’s interface includes an input
7142        *    variable decorated with SamplePosition, Sample Shading is
7143        *    considered enabled with a minSampleShading value of 1.0."
7144        *
7145        * Similar text exists for SampleId.  Regarding the Sample decoration,
7146        * the Vulkan 1.2.199 spec says:
7147        *
7148        *    "If a fragment shader input is decorated with Sample, a separate
7149        *    value must be assigned to that variable for each covered sample in
7150        *    the fragment, and that value must be sampled at the location of
7151        *    the individual sample. When rasterizationSamples is
7152        *    VK_SAMPLE_COUNT_1_BIT, the fragment center must be used for
7153        *    Centroid, Sample, and undecorated attribute interpolation."
7154        *
7155        * Unfortunately, this isn't quite as clear about static use and the
7156        * interface but the static use check should be valid.
7157        *
7158        * For OpenGL, similar language exists but it's all more wishy-washy.
7159        * We'll assume the same behavior across APIs.
7160        */
7161       nir_foreach_variable_with_modes(var, b->shader,
7162                                       nir_var_shader_in |
7163                                       nir_var_system_value) {
7164          struct nir_variable_data *members =
7165             var->members ? var->members : &var->data;
7166          uint16_t num_members = var->members ? var->num_members : 1;
7167          for (uint16_t i = 0; i < num_members; i++) {
7168             if (members[i].mode == nir_var_system_value &&
7169                 (members[i].location == SYSTEM_VALUE_SAMPLE_ID ||
7170                  members[i].location == SYSTEM_VALUE_SAMPLE_POS))
7171                b->shader->info.fs.uses_sample_shading = true;
7172 
7173             if (members[i].mode == nir_var_shader_in && members[i].sample)
7174                b->shader->info.fs.uses_sample_shading = true;
7175          }
7176       }
7177    }
7178 
7179    /* Work around applications that declare shader_call_data variables inside
7180     * ray generation shaders.
7181     *
7182     * https://gitlab.freedesktop.org/mesa/mesa/-/issues/5326
7183     */
7184    if (stage == MESA_SHADER_RAYGEN)
7185       NIR_PASS(_, b->shader, nir_remove_dead_variables, nir_var_shader_call_data,
7186                NULL);
7187 
7188    /* Unparent the shader from the vtn_builder before we delete the builder */
7189    ralloc_steal(NULL, b->shader);
7190 
7191    nir_shader *shader = b->shader;
7192    ralloc_free(b);
7193 
7194    return shader;
7195 }
7196 
7197 static bool
func_to_nir_builder(FILE * fp,struct vtn_function * func)7198 func_to_nir_builder(FILE *fp, struct vtn_function *func)
7199 {
7200    nir_function *nir_func = func->nir_func;
7201    struct vtn_type *return_type = func->type->return_type;
7202    bool returns = return_type->base_type != vtn_base_type_void;
7203 
7204    if (returns && return_type->base_type != vtn_base_type_scalar &&
7205                   return_type->base_type != vtn_base_type_vector) {
7206       fprintf(stderr, "Unsupported return type for %s", nir_func->name);
7207       return false;
7208    }
7209 
7210    /* If there is a return type, the first NIR parameter is the return deref,
7211     * so offset by that for logical parameter iteration.
7212     */
7213    unsigned first_param = returns ? 1 : 0;
7214 
7215    /* Generate function signature */
7216    fprintf(fp, "static inline %s\n", returns ? "nir_def *": "void");
7217    fprintf(fp, "%s(nir_builder *b", nir_func->name);
7218 
7219    /* TODO: Can we recover parameter names? */
7220    for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7221       fprintf(fp, ", nir_def *arg%u", i);
7222    }
7223 
7224    fprintf(fp, ")\n{\n");
7225 
7226    /* Validate inputs. nir_validate will do this too, but the
7227     * errors/backtraces from these asserts should be nicer.
7228     */
7229    for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7230       nir_parameter *param = &nir_func->params[i];
7231       fprintf(fp, "   assert(arg%u->bit_size == %u);\n", i, param->bit_size);
7232       fprintf(fp, "   assert(arg%u->num_components == %u);\n", i,
7233               param->num_components);
7234       fprintf(fp, "\n");
7235    }
7236 
7237    /* Find the function to call. If not found, create a prototype */
7238    fprintf(fp, "   nir_function *func = nir_shader_get_function_for_name(b->shader, \"%s\");\n",
7239            nir_func->name);
7240    fprintf(fp, "\n");
7241    fprintf(fp, "   if (!func) {\n");
7242    fprintf(fp, "      func = nir_function_create(b->shader, \"%s\");\n",
7243            nir_func->name);
7244    fprintf(fp, "      func->num_params = %u;\n", nir_func->num_params);
7245    fprintf(fp, "      func->params = ralloc_array(b->shader, nir_parameter, func->num_params);\n");
7246 
7247    for (unsigned i = 0; i < nir_func->num_params; ++i) {
7248       fprintf(fp, "\n");
7249       fprintf(fp, "      func->params[%u].bit_size = %u;\n", i,
7250               nir_func->params[i].bit_size);
7251       fprintf(fp, "      func->params[%u].num_components = %u;\n", i,
7252               nir_func->params[i].num_components);
7253    }
7254 
7255    fprintf(fp, "   }\n\n");
7256 
7257 
7258    if (returns) {
7259       /* We assume that vec3 variables are lowered to vec4. Mirror that here so
7260        * we don't need to lower vec3 to vec4 again at link-time.
7261        */
7262       assert(glsl_type_is_vector_or_scalar(return_type->type));
7263       unsigned elements = return_type->type->vector_elements;
7264       if (elements == 3)
7265          elements = 4;
7266 
7267       /* Reconstruct the return type. */
7268       fprintf(fp, "   const struct glsl_type *ret_type = glsl_vector_type(%u, %u);\n",
7269               return_type->type->base_type, elements);
7270 
7271       /* With the type, we can make a variable and get a deref to pass in */
7272       fprintf(fp, "   nir_variable *ret = nir_local_variable_create(b->impl, ret_type, \"return\");\n");
7273       fprintf(fp, "   nir_deref_instr *deref = nir_build_deref_var(b, ret);\n");
7274 
7275       /* XXX: This is a hack due to ptr size differing between KERNEL and other
7276        * shader stages. This needs to be fixed in core NIR.
7277        */
7278       fprintf(fp, "   deref->def.bit_size = %u;\n", nir_func->params[0].bit_size);
7279       fprintf(fp, "\n");
7280    }
7281 
7282    /* Call the function */
7283    fprintf(fp, "   nir_call(b, func");
7284 
7285    if (returns)
7286       fprintf(fp, ", &deref->def");
7287 
7288    for (unsigned i = first_param; i < nir_func->num_params; ++i)
7289       fprintf(fp, ", arg%u", i);
7290 
7291    fprintf(fp, ");\n");
7292 
7293    /* Load the return value if any, undoing the vec3->vec4 lowering. */
7294    if (returns) {
7295       fprintf(fp, "\n");
7296 
7297       if (return_type->type->vector_elements == 3)
7298          fprintf(fp, "   return nir_trim_vector(b, nir_load_deref(b, deref), 3);\n");
7299       else
7300          fprintf(fp, "   return nir_load_deref(b, deref);\n");
7301    }
7302 
7303    fprintf(fp, "}\n\n");
7304    return true;
7305 }
7306 
7307 bool
spirv_library_to_nir_builder(FILE * fp,const uint32_t * words,size_t word_count,const struct spirv_to_nir_options * options)7308 spirv_library_to_nir_builder(FILE *fp, const uint32_t *words, size_t word_count,
7309                              const struct spirv_to_nir_options *options)
7310 {
7311 #ifndef NDEBUG
7312    static once_flag initialized_debug_flag = ONCE_FLAG_INIT;
7313    call_once(&initialized_debug_flag, initialize_mesa_spirv_debug);
7314 #endif
7315 
7316    const uint32_t *word_end = words + word_count;
7317 
7318    struct vtn_builder *b = vtn_create_builder(words, word_count,
7319                                               MESA_SHADER_KERNEL, "placeholder name",
7320                                               options);
7321 
7322    if (b == NULL)
7323       return false;
7324 
7325    /* See also _vtn_fail() */
7326    if (vtn_setjmp(b->fail_jump)) {
7327       ralloc_free(b);
7328       return false;
7329    }
7330 
7331    b->shader = nir_shader_create(b, MESA_SHADER_KERNEL,
7332                                  &(const nir_shader_compiler_options){0}, NULL);
7333 
7334    /* Skip the SPIR-V header, handled at vtn_create_builder */
7335    words+= 5;
7336 
7337    /* Handle all the preamble instructions */
7338    words = vtn_foreach_instruction(b, words, word_end,
7339                                    vtn_handle_preamble_instruction);
7340 
7341    /* Handle all variable, type, and constant instructions */
7342    words = vtn_foreach_instruction(b, words, word_end,
7343                                    vtn_handle_variable_or_type_instruction);
7344 
7345    /* Set types on all vtn_values */
7346    vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
7347 
7348    vtn_build_cfg(b, words, word_end);
7349 
7350    fprintf(fp, "#include \"compiler/nir/nir_builder.h\"\n\n");
7351 
7352    vtn_foreach_function(func, &b->functions) {
7353       if (func->linkage != SpvLinkageTypeExport)
7354          continue;
7355 
7356       if (!func_to_nir_builder(fp, func))
7357          return false;
7358    }
7359 
7360    ralloc_free(b);
7361    return true;
7362 }
7363