• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Faith Ekstrand (faith@gfxstrand.net)
25  *
26  */
27 
28 #include "glsl_types.h"
29 #include "vtn_private.h"
30 #include "nir/nir_vla.h"
31 #include "nir/nir_control_flow.h"
32 #include "nir/nir_constant_expressions.h"
33 #include "nir/nir_deref.h"
34 #include "spirv_info.h"
35 
36 #include "util/format/u_format.h"
37 #include "util/u_math.h"
38 #include "util/u_string.h"
39 #include "util/u_debug.h"
40 #include "util/mesa-blake3.h"
41 
42 #include <stdio.h>
43 
44 /* Table of all implemented capabilities.  These are the capabilities that are
45  * implemented in the spirv_to_nir, not what the device supports.
46  *
47  * This list should remain alphabetized.  For the purposes of alphabetization,
48  * suffixes do not exist and 8 comes before 16.
49  */
50 static const struct spirv_capabilities implemented_capabilities = {
51    .Addresses = true,
52    .AtomicFloat16AddEXT = true,
53    .AtomicFloat32AddEXT = true,
54    .AtomicFloat64AddEXT = true,
55    .AtomicFloat16MinMaxEXT = true,
56    .AtomicFloat32MinMaxEXT = true,
57    .AtomicFloat64MinMaxEXT = true,
58    .AtomicStorage = true,
59    .ClipDistance = true,
60    .ComputeDerivativeGroupLinearKHR = true,
61    .ComputeDerivativeGroupQuadsKHR = true,
62    .CooperativeMatrixKHR = true,
63    .CullDistance = true,
64    .DemoteToHelperInvocation = true,
65    .DenormFlushToZero = true,
66    .DenormPreserve = true,
67    .DerivativeControl = true,
68    .DeviceGroup = true,
69    .DotProduct = true,
70    .DotProductInput4x8Bit = true,
71    .DotProductInput4x8BitPacked = true,
72    .DotProductInputAll = true,
73    .DrawParameters = true,
74    .ExpectAssumeKHR = true,
75    .Float16 = true,
76    .Float16Buffer = true,
77    .Float64 = true,
78    .FloatControls2 = true,
79    .FragmentBarycentricKHR = true,
80    .FragmentDensityEXT = true,
81    .FragmentFullyCoveredEXT = true,
82    .FragmentMaskAMD = true,
83    .FragmentShaderPixelInterlockEXT = true,
84    .FragmentShaderSampleInterlockEXT = true,
85    .FragmentShadingRateKHR = true,
86    .GenericPointer = true,
87    .Geometry = true,
88    .GeometryPointSize = true,
89    .GeometryStreams = true,
90    .GroupNonUniform = true,
91    .GroupNonUniformArithmetic = true,
92    .GroupNonUniformBallot = true,
93    .GroupNonUniformClustered = true,
94    .GroupNonUniformQuad = true,
95    .GroupNonUniformRotateKHR = true,
96    .GroupNonUniformShuffle = true,
97    .GroupNonUniformShuffleRelative = true,
98    .GroupNonUniformVote = true,
99    .Groups = true,
100    .Image1D = true,
101    .ImageBasic = true,
102    .ImageBuffer = true,
103    .ImageCubeArray = true,
104    .ImageGatherBiasLodAMD = true,
105    .ImageGatherExtended = true,
106    .ImageMipmap = true,
107    .ImageMSArray = true,
108    .ImageQuery = true,
109    .ImageReadWrite = true,
110    .ImageReadWriteLodAMD = true,
111    .ImageRect = true,
112    .InputAttachment = true,
113    .InputAttachmentArrayDynamicIndexingEXT = true,
114    .InputAttachmentArrayNonUniformIndexingEXT = true,
115    .Int8 = true,
116    .Int16 = true,
117    .Int64 = true,
118    .Int64Atomics = true,
119    .Int64ImageEXT = true,
120    .IntegerFunctions2INTEL = true,
121    .InterpolationFunction = true,
122    .Kernel = true,
123    .Linkage = true,
124    .LiteralSampler = true,
125    .Matrix = true,
126    .MeshShadingEXT = true,
127    .MeshShadingNV = true,
128    .MinLod = true,
129    .MultiView = true,
130    .MultiViewport = true,
131    .OptNoneINTEL = true, // FIXME: make codegen emit the EXT name
132    .PerViewAttributesNV = true,
133    .PhysicalStorageBufferAddresses = true,
134    .QuadControlKHR = true,
135    .RayCullMaskKHR = true,
136    .RayQueryKHR = true,
137    .RayQueryPositionFetchKHR = true,
138    .RayTracingKHR = true,
139    .RayTracingPositionFetchKHR = true,
140    .RayTraversalPrimitiveCullingKHR = true,
141    .ReplicatedCompositesEXT = true,
142    .RoundingModeRTE = true,
143    .RoundingModeRTZ = true,
144    .RuntimeDescriptorArrayEXT = true,
145    .Sampled1D = true,
146    .SampledBuffer = true,
147    .SampledCubeArray = true,
148    .SampledImageArrayDynamicIndexing = true,
149    .SampledImageArrayNonUniformIndexingEXT = true,
150    .SampledRect = true,
151    .SampleMaskPostDepthCoverage = true,
152    .SampleRateShading = true,
153    .Shader = true,
154    .ShaderClockKHR = true,
155    .ShaderEnqueueAMDX = true,
156    .ShaderLayer = true,
157    .ShaderNonUniformEXT = true,
158    .ShaderSMBuiltinsNV = true,
159    .ShaderViewportIndex = true,
160    .ShaderViewportIndexLayerEXT = true,
161    .ShaderViewportMaskNV = true,
162    .SignedZeroInfNanPreserve = true,
163    .SparseResidency = true,
164    .StencilExportEXT = true,
165    .StorageBuffer8BitAccess = true,
166    .StorageBufferArrayDynamicIndexing = true,
167    .StorageBufferArrayNonUniformIndexingEXT = true,
168    .StorageImageArrayDynamicIndexing = true,
169    .StorageImageArrayNonUniformIndexingEXT = true,
170    .StorageImageExtendedFormats = true,
171    .StorageImageMultisample = true,
172    .StorageImageReadWithoutFormat = true,
173    .StorageImageWriteWithoutFormat = true,
174    .StorageInputOutput16 = true,
175    .StoragePushConstant8 = true,
176    .StoragePushConstant16 = true,
177    .StorageTexelBufferArrayDynamicIndexingEXT = true,
178    .StorageTexelBufferArrayNonUniformIndexingEXT = true,
179    .StorageUniform16 = true,
180    .StorageUniformBufferBlock16 = true,
181    .SubgroupBallotKHR = true,
182    .SubgroupBufferBlockIOINTEL = true,
183    .SubgroupShuffleINTEL = true,
184    .SubgroupVoteKHR = true,
185    .Tessellation = true,
186    .TessellationPointSize = true,
187    .TransformFeedback = true,
188    .UniformAndStorageBuffer8BitAccess = true,
189    .UniformBufferArrayDynamicIndexing = true,
190    .UniformBufferArrayNonUniformIndexingEXT = true,
191    .UniformTexelBufferArrayDynamicIndexingEXT = true,
192    .UniformTexelBufferArrayNonUniformIndexingEXT = true,
193    .VariablePointers = true,
194    .VariablePointersStorageBuffer = true,
195    .Vector16 = true,
196    .VulkanMemoryModel = true,
197    .VulkanMemoryModelDeviceScope = true,
198    .WorkgroupMemoryExplicitLayoutKHR = true,
199    .WorkgroupMemoryExplicitLayout8BitAccessKHR = true,
200    .WorkgroupMemoryExplicitLayout16BitAccessKHR = true,
201 };
202 
203 uint32_t mesa_spirv_debug = 0;
204 
205 static const struct debug_named_value mesa_spirv_debug_control[] = {
206    { "structured", MESA_SPIRV_DEBUG_STRUCTURED,
207      "Print information of the SPIR-V structured control flow parsing" },
208    { "values", MESA_SPIRV_DEBUG_VALUES,
209      "Print information of the SPIR-V values" },
210    { "asm", MESA_SPIRV_DEBUG_ASM, "Print the SPIR-V assembly" },
211    { "color", MESA_SPIRV_DEBUG_COLOR, "Debug in color, if available" },
212    DEBUG_NAMED_VALUE_END,
213 };
214 
215 DEBUG_GET_ONCE_FLAGS_OPTION(mesa_spirv_debug, "MESA_SPIRV_DEBUG", mesa_spirv_debug_control, 0)
216 
217 /* DO NOT CALL THIS FUNCTION DIRECTLY. Use mesa_spirv_debug_init() instead */
218 static void
initialize_mesa_spirv_debug(void)219 initialize_mesa_spirv_debug(void)
220 {
221    mesa_spirv_debug = debug_get_option_mesa_spirv_debug();
222 }
223 
224 static void
mesa_spirv_debug_init(void)225 mesa_spirv_debug_init(void)
226 {
227    static once_flag initialized_debug_flag = ONCE_FLAG_INIT;
228    call_once(&initialized_debug_flag, initialize_mesa_spirv_debug);
229 }
230 
231 #ifndef NDEBUG
232 static enum nir_spirv_debug_level
vtn_default_log_level(void)233 vtn_default_log_level(void)
234 {
235    enum nir_spirv_debug_level level = NIR_SPIRV_DEBUG_LEVEL_WARNING;
236    const char *vtn_log_level_strings[] = {
237       [NIR_SPIRV_DEBUG_LEVEL_WARNING] = "warning",
238       [NIR_SPIRV_DEBUG_LEVEL_INFO]  = "info",
239       [NIR_SPIRV_DEBUG_LEVEL_ERROR] = "error",
240    };
241    const char *str = getenv("MESA_SPIRV_LOG_LEVEL");
242 
243    if (str == NULL)
244       return level;
245 
246    for (int i = 0; i < ARRAY_SIZE(vtn_log_level_strings); i++) {
247       if (strcasecmp(str, vtn_log_level_strings[i]) == 0) {
248          level = i;
249          break;
250       }
251    }
252 
253    return level;
254 }
255 #endif
256 
257 void
vtn_log(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)258 vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level,
259         size_t spirv_offset, const char *message)
260 {
261    if (b->options->debug.func) {
262       b->options->debug.func(b->options->debug.private_data,
263                              level, spirv_offset, message);
264    }
265 
266 #ifndef NDEBUG
267    static enum nir_spirv_debug_level default_level =
268       NIR_SPIRV_DEBUG_LEVEL_INVALID;
269 
270    if (default_level == NIR_SPIRV_DEBUG_LEVEL_INVALID)
271       default_level = vtn_default_log_level();
272 
273    if (level >= default_level)
274       fprintf(stderr, "%s\n", message);
275 #endif
276 }
277 
278 void
vtn_logf(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * fmt,...)279 vtn_logf(struct vtn_builder *b, enum nir_spirv_debug_level level,
280          size_t spirv_offset, const char *fmt, ...)
281 {
282    va_list args;
283    char *msg;
284 
285    va_start(args, fmt);
286    msg = ralloc_vasprintf(NULL, fmt, args);
287    va_end(args);
288 
289    vtn_log(b, level, spirv_offset, msg);
290 
291    ralloc_free(msg);
292 }
293 
294 static void
vtn_log_err(struct vtn_builder * b,enum nir_spirv_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)295 vtn_log_err(struct vtn_builder *b,
296             enum nir_spirv_debug_level level, const char *prefix,
297             const char *file, unsigned line,
298             const char *fmt, va_list args)
299 {
300    char *msg;
301 
302    msg = ralloc_strdup(NULL, prefix);
303 
304 #ifndef NDEBUG
305    ralloc_asprintf_append(&msg, "    In file %s:%u\n", file, line);
306 #endif
307 
308    ralloc_asprintf_append(&msg, "    ");
309 
310    ralloc_vasprintf_append(&msg, fmt, args);
311 
312    ralloc_asprintf_append(&msg, "\n    %zu bytes into the SPIR-V binary",
313                           b->spirv_offset);
314 
315    if (b->file) {
316       ralloc_asprintf_append(&msg,
317                              "\n    in SPIR-V source file %s, line %d, col %d",
318                              b->file, b->line, b->col);
319    }
320 
321    vtn_log(b, level, b->spirv_offset, msg);
322 
323    ralloc_free(msg);
324 }
325 
326 static void
vtn_dump_shader(struct vtn_builder * b,const char * path,const char * prefix)327 vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix)
328 {
329    static int idx = 0;
330 
331    char filename[1024];
332    int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv",
333                       path, prefix, idx++);
334    if (len < 0 || len >= sizeof(filename))
335       return;
336 
337    FILE *f = fopen(filename, "wb");
338    if (f == NULL)
339       return;
340 
341    fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f);
342    fclose(f);
343 
344    vtn_info("SPIR-V shader dumped to %s", filename);
345 }
346 
347 void
_vtn_warn(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)348 _vtn_warn(struct vtn_builder *b, const char *file, unsigned line,
349           const char *fmt, ...)
350 {
351    va_list args;
352 
353    va_start(args, fmt);
354    vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_WARNING, "SPIR-V WARNING:\n",
355                file, line, fmt, args);
356    va_end(args);
357 }
358 
359 void
_vtn_err(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)360 _vtn_err(struct vtn_builder *b, const char *file, unsigned line,
361           const char *fmt, ...)
362 {
363    va_list args;
364 
365    va_start(args, fmt);
366    vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V ERROR:\n",
367                file, line, fmt, args);
368    va_end(args);
369 }
370 
371 void
_vtn_fail(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)372 _vtn_fail(struct vtn_builder *b, const char *file, unsigned line,
373           const char *fmt, ...)
374 {
375    va_list args;
376 
377    if (MESA_SPIRV_DEBUG(VALUES))
378       vtn_dump_values(b, stderr);
379 
380    va_start(args, fmt);
381    vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V parsing FAILED:\n",
382                file, line, fmt, args);
383    va_end(args);
384 
385    const char *dump_path = secure_getenv("MESA_SPIRV_FAIL_DUMP_PATH");
386    if (dump_path)
387       vtn_dump_shader(b, dump_path, "fail");
388 
389 #ifndef NDEBUG
390    if (!b->options->skip_os_break_in_debug_build)
391       os_break();
392 #endif
393 
394    vtn_longjmp(b->fail_jump, 1);
395 }
396 
397 const char *
vtn_value_type_to_string(enum vtn_value_type t)398 vtn_value_type_to_string(enum vtn_value_type t)
399 {
400 #define CASE(typ) case vtn_value_type_##typ: return #typ
401    switch (t) {
402    CASE(invalid);
403    CASE(undef);
404    CASE(string);
405    CASE(decoration_group);
406    CASE(type);
407    CASE(constant);
408    CASE(pointer);
409    CASE(function);
410    CASE(block);
411    CASE(ssa);
412    CASE(extension);
413    CASE(image_pointer);
414    }
415 #undef CASE
416    unreachable("unknown value type");
417    return "UNKNOWN";
418 }
419 
420 static const char *
vtn_base_type_to_string(enum vtn_base_type t)421 vtn_base_type_to_string(enum vtn_base_type t)
422 {
423 #define CASE(typ) case vtn_base_type_##typ: return #typ
424    switch (t) {
425    CASE(void);
426    CASE(scalar);
427    CASE(vector);
428    CASE(matrix);
429    CASE(array);
430    CASE(struct);
431    CASE(pointer);
432    CASE(image);
433    CASE(sampler);
434    CASE(sampled_image);
435    CASE(accel_struct);
436    CASE(ray_query);
437    CASE(function);
438    CASE(event);
439    CASE(cooperative_matrix);
440    }
441 #undef CASE
442    unreachable("unknown base type");
443    return "UNKNOWN";
444 }
445 
446 
447 void
_vtn_fail_value_type_mismatch(struct vtn_builder * b,uint32_t value_id,enum vtn_value_type value_type)448 _vtn_fail_value_type_mismatch(struct vtn_builder *b, uint32_t value_id,
449                               enum vtn_value_type value_type)
450 {
451    struct vtn_value *val = vtn_untyped_value(b, value_id);
452    vtn_fail(
453       "SPIR-V id %u is the wrong kind of value: "
454       "expected '%s' but got '%s'",
455       vtn_id_for_value(b, val),
456       vtn_value_type_to_string(value_type),
457       vtn_value_type_to_string(val->value_type));
458 }
459 
_vtn_fail_value_not_pointer(struct vtn_builder * b,uint32_t value_id)460 void _vtn_fail_value_not_pointer(struct vtn_builder *b,
461                                  uint32_t value_id)
462 {
463    struct vtn_value *val = vtn_untyped_value(b, value_id);
464    vtn_fail("SPIR-V id %u is the wrong kind of value: "
465             "expected 'pointer' OR null constant but got "
466             "'%s' (%s)", value_id,
467             vtn_value_type_to_string(val->value_type),
468             val->is_null_constant ? "null constant" : "not null constant");
469 }
470 
471 static struct vtn_ssa_value *
vtn_undef_ssa_value(struct vtn_builder * b,const struct glsl_type * type)472 vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
473 {
474    struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
475    val->type = glsl_get_bare_type(type);
476 
477    if (glsl_type_is_cmat(type)) {
478       nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_undef");
479       vtn_set_ssa_value_var(b, val, mat->var);
480    } else if (glsl_type_is_vector_or_scalar(type)) {
481       unsigned num_components = glsl_get_vector_elements(val->type);
482       unsigned bit_size = glsl_get_bit_size(val->type);
483       val->def = nir_undef(&b->nb, num_components, bit_size);
484    } else {
485       unsigned elems = glsl_get_length(val->type);
486       val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
487       if (glsl_type_is_array_or_matrix(type)) {
488          const struct glsl_type *elem_type = glsl_get_array_element(type);
489          for (unsigned i = 0; i < elems; i++)
490             val->elems[i] = vtn_undef_ssa_value(b, elem_type);
491       } else {
492          vtn_assert(glsl_type_is_struct_or_ifc(type));
493          for (unsigned i = 0; i < elems; i++) {
494             const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
495             val->elems[i] = vtn_undef_ssa_value(b, elem_type);
496          }
497       }
498    }
499 
500    return val;
501 }
502 
503 struct vtn_ssa_value *
vtn_const_ssa_value(struct vtn_builder * b,nir_constant * constant,const struct glsl_type * type)504 vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
505                     const struct glsl_type *type)
506 {
507    struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
508    val->type = glsl_get_bare_type(type);
509 
510    if (glsl_type_is_cmat(type)) {
511       const struct glsl_type *element_type = glsl_get_cmat_element(type);
512 
513       nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_constant");
514       nir_cmat_construct(&b->nb, &mat->def,
515                          nir_build_imm(&b->nb, 1, glsl_get_bit_size(element_type),
516                                        constant->values));
517       vtn_set_ssa_value_var(b, val, mat->var);
518    } else if (glsl_type_is_vector_or_scalar(type)) {
519       val->def = nir_build_imm(&b->nb, glsl_get_vector_elements(val->type),
520                                glsl_get_bit_size(val->type),
521                                constant->values);
522    } else {
523       unsigned elems = glsl_get_length(val->type);
524       val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
525       if (glsl_type_is_array_or_matrix(type)) {
526          const struct glsl_type *elem_type = glsl_get_array_element(type);
527          for (unsigned i = 0; i < elems; i++) {
528             val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
529                                                 elem_type);
530          }
531       } else {
532          vtn_assert(glsl_type_is_struct_or_ifc(type));
533          for (unsigned i = 0; i < elems; i++) {
534             const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
535             val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
536                                                 elem_type);
537          }
538       }
539    }
540 
541    return val;
542 }
543 
544 struct vtn_ssa_value *
vtn_ssa_value(struct vtn_builder * b,uint32_t value_id)545 vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
546 {
547    struct vtn_value *val = vtn_untyped_value(b, value_id);
548    switch (val->value_type) {
549    case vtn_value_type_undef:
550       return vtn_undef_ssa_value(b, val->type->type);
551 
552    case vtn_value_type_constant:
553       return vtn_const_ssa_value(b, val->constant, val->type->type);
554 
555    case vtn_value_type_ssa:
556       return val->ssa;
557 
558    case vtn_value_type_pointer:
559       vtn_assert(val->pointer->type && val->pointer->type->type);
560       struct vtn_ssa_value *ssa =
561          vtn_create_ssa_value(b, val->pointer->type->type);
562       ssa->def = vtn_pointer_to_ssa(b, val->pointer);
563       return ssa;
564 
565    default:
566       vtn_fail("Invalid type for an SSA value");
567    }
568 }
569 
570 struct vtn_value *
vtn_push_ssa_value(struct vtn_builder * b,uint32_t value_id,struct vtn_ssa_value * ssa)571 vtn_push_ssa_value(struct vtn_builder *b, uint32_t value_id,
572                    struct vtn_ssa_value *ssa)
573 {
574    struct vtn_type *type = vtn_get_value_type(b, value_id);
575 
576    /* See vtn_create_ssa_value */
577    vtn_fail_if(ssa->type != glsl_get_bare_type(type->type),
578                "Type mismatch for SPIR-V value %%%u", value_id);
579 
580    struct vtn_value *val;
581    if (type->base_type == vtn_base_type_pointer) {
582       val = vtn_push_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type));
583    } else {
584       /* Don't trip the value_type_ssa check in vtn_push_value */
585       val = vtn_push_value(b, value_id, vtn_value_type_invalid);
586       val->value_type = vtn_value_type_ssa;
587       val->ssa = ssa;
588    }
589 
590    return val;
591 }
592 
593 nir_def *
vtn_get_nir_ssa(struct vtn_builder * b,uint32_t value_id)594 vtn_get_nir_ssa(struct vtn_builder *b, uint32_t value_id)
595 {
596    struct vtn_ssa_value *ssa = vtn_ssa_value(b, value_id);
597    vtn_fail_if(!glsl_type_is_vector_or_scalar(ssa->type),
598                "Expected a vector or scalar type");
599    return ssa->def;
600 }
601 
602 struct vtn_value *
vtn_push_nir_ssa(struct vtn_builder * b,uint32_t value_id,nir_def * def)603 vtn_push_nir_ssa(struct vtn_builder *b, uint32_t value_id, nir_def *def)
604 {
605    /* Types for all SPIR-V SSA values are set as part of a pre-pass so the
606     * type will be valid by the time we get here.
607     */
608    struct vtn_type *type = vtn_get_value_type(b, value_id);
609    vtn_fail_if(def->num_components != glsl_get_vector_elements(type->type) ||
610                def->bit_size != glsl_get_bit_size(type->type),
611                "Mismatch between NIR and SPIR-V type.");
612    struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
613    ssa->def = def;
614    return vtn_push_ssa_value(b, value_id, ssa);
615 }
616 
617 nir_deref_instr *
vtn_get_deref_for_id(struct vtn_builder * b,uint32_t value_id)618 vtn_get_deref_for_id(struct vtn_builder *b, uint32_t value_id)
619 {
620    return vtn_get_deref_for_ssa_value(b, vtn_ssa_value(b, value_id));
621 }
622 
623 nir_deref_instr *
vtn_get_deref_for_ssa_value(struct vtn_builder * b,struct vtn_ssa_value * ssa)624 vtn_get_deref_for_ssa_value(struct vtn_builder *b, struct vtn_ssa_value *ssa)
625 {
626    vtn_fail_if(!ssa->is_variable, "Expected an SSA value with a nir_variable");
627    return nir_build_deref_var(&b->nb, ssa->var);
628 }
629 
630 struct vtn_value *
vtn_push_var_ssa(struct vtn_builder * b,uint32_t value_id,nir_variable * var)631 vtn_push_var_ssa(struct vtn_builder *b, uint32_t value_id, nir_variable *var)
632 {
633    struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, var->type);
634    vtn_set_ssa_value_var(b, ssa, var);
635    return vtn_push_ssa_value(b, value_id, ssa);
636 }
637 
638 static enum gl_access_qualifier
spirv_to_gl_access_qualifier(struct vtn_builder * b,SpvAccessQualifier access_qualifier)639 spirv_to_gl_access_qualifier(struct vtn_builder *b,
640                              SpvAccessQualifier access_qualifier)
641 {
642    switch (access_qualifier) {
643    case SpvAccessQualifierReadOnly:
644       return ACCESS_NON_WRITEABLE;
645    case SpvAccessQualifierWriteOnly:
646       return ACCESS_NON_READABLE;
647    case SpvAccessQualifierReadWrite:
648       return 0;
649    default:
650       vtn_fail("Invalid image access qualifier");
651    }
652 }
653 
654 static nir_deref_instr *
vtn_get_image(struct vtn_builder * b,uint32_t value_id,enum gl_access_qualifier * access)655 vtn_get_image(struct vtn_builder *b, uint32_t value_id,
656               enum gl_access_qualifier *access)
657 {
658    struct vtn_type *type = vtn_get_value_type(b, value_id);
659    vtn_assert(type->base_type == vtn_base_type_image);
660    if (access)
661       *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier);
662    nir_variable_mode mode = glsl_type_is_image(type->glsl_image) ?
663                             nir_var_image : nir_var_uniform;
664    return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
665                                mode, type->glsl_image, 0);
666 }
667 
668 static void
vtn_push_image(struct vtn_builder * b,uint32_t value_id,nir_deref_instr * deref,bool propagate_non_uniform)669 vtn_push_image(struct vtn_builder *b, uint32_t value_id,
670                nir_deref_instr *deref, bool propagate_non_uniform)
671 {
672    struct vtn_type *type = vtn_get_value_type(b, value_id);
673    vtn_assert(type->base_type == vtn_base_type_image);
674    struct vtn_value *value = vtn_push_nir_ssa(b, value_id, &deref->def);
675    value->propagated_non_uniform = propagate_non_uniform;
676 }
677 
678 static nir_deref_instr *
vtn_get_sampler(struct vtn_builder * b,uint32_t value_id)679 vtn_get_sampler(struct vtn_builder *b, uint32_t value_id)
680 {
681    struct vtn_type *type = vtn_get_value_type(b, value_id);
682    vtn_assert(type->base_type == vtn_base_type_sampler);
683    return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
684                                nir_var_uniform, glsl_bare_sampler_type(), 0);
685 }
686 
687 nir_def *
vtn_sampled_image_to_nir_ssa(struct vtn_builder * b,struct vtn_sampled_image si)688 vtn_sampled_image_to_nir_ssa(struct vtn_builder *b,
689                              struct vtn_sampled_image si)
690 {
691    return nir_vec2(&b->nb, &si.image->def, &si.sampler->def);
692 }
693 
694 static void
vtn_push_sampled_image(struct vtn_builder * b,uint32_t value_id,struct vtn_sampled_image si,bool propagate_non_uniform)695 vtn_push_sampled_image(struct vtn_builder *b, uint32_t value_id,
696                        struct vtn_sampled_image si, bool propagate_non_uniform)
697 {
698    struct vtn_type *type = vtn_get_value_type(b, value_id);
699    vtn_assert(type->base_type == vtn_base_type_sampled_image);
700    struct vtn_value *value = vtn_push_nir_ssa(b, value_id,
701                                               vtn_sampled_image_to_nir_ssa(b, si));
702    value->propagated_non_uniform = propagate_non_uniform;
703 }
704 
705 static struct vtn_sampled_image
vtn_get_sampled_image(struct vtn_builder * b,uint32_t value_id)706 vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id)
707 {
708    struct vtn_type *type = vtn_get_value_type(b, value_id);
709    vtn_assert(type->base_type == vtn_base_type_sampled_image);
710    nir_def *si_vec2 = vtn_get_nir_ssa(b, value_id);
711 
712    /* Even though this is a sampled image, we can end up here with a storage
713     * image because OpenCL doesn't distinguish between the two.
714     */
715    const struct glsl_type *image_type = type->image->glsl_image;
716    nir_variable_mode image_mode = glsl_type_is_image(image_type) ?
717                                   nir_var_image : nir_var_uniform;
718 
719    struct vtn_sampled_image si = { NULL, };
720    si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0),
721                                    image_mode, image_type, 0);
722    si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1),
723                                      nir_var_uniform,
724                                      glsl_bare_sampler_type(), 0);
725    return si;
726 }
727 
728 const char *
vtn_string_literal(struct vtn_builder * b,const uint32_t * words,unsigned word_count,unsigned * words_used)729 vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
730                    unsigned word_count, unsigned *words_used)
731 {
732    /* From the SPIR-V spec:
733     *
734     *    "A string is interpreted as a nul-terminated stream of characters.
735     *    The character set is Unicode in the UTF-8 encoding scheme. The UTF-8
736     *    octets (8-bit bytes) are packed four per word, following the
737     *    little-endian convention (i.e., the first octet is in the
738     *    lowest-order 8 bits of the word). The final word contains the
739     *    string’s nul-termination character (0), and all contents past the
740     *    end of the string in the final word are padded with 0."
741     *
742     * On big-endian, we need to byte-swap.
743     */
744 #if UTIL_ARCH_BIG_ENDIAN
745    {
746       uint32_t *copy = vtn_alloc_array(b, uint32_t, word_count);
747       for (unsigned i = 0; i < word_count; i++)
748          copy[i] = util_bswap32(words[i]);
749       words = copy;
750    }
751 #endif
752 
753    const char *str = (const char *)words;
754    const char *end = memchr(str, 0, word_count * 4);
755    vtn_fail_if(end == NULL, "String is not null-terminated");
756 
757    if (words_used)
758       *words_used = DIV_ROUND_UP(end - str + 1, sizeof(*words));
759 
760    return str;
761 }
762 
763 const uint32_t *
vtn_foreach_instruction(struct vtn_builder * b,const uint32_t * start,const uint32_t * end,vtn_instruction_handler handler)764 vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
765                         const uint32_t *end, vtn_instruction_handler handler)
766 {
767    const uint32_t *w = start;
768    while (w < end) {
769       SpvOp opcode = w[0] & SpvOpCodeMask;
770       unsigned count = w[0] >> SpvWordCountShift;
771       vtn_assert(count >= 1 && w + count <= end);
772 
773       b->spirv_offset = (uint8_t *)w - (uint8_t *)b->spirv;
774 
775       switch (opcode) {
776       case SpvOpNop:
777          break; /* Do nothing */
778 
779       case SpvOpLine:
780          b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
781          b->line = w[2];
782          b->col = w[3];
783          break;
784 
785       case SpvOpNoLine:
786          b->file = NULL;
787          b->line = -1;
788          b->col = -1;
789          break;
790 
791       default:
792          if (!handler(b, opcode, w, count))
793             return w;
794          break;
795       }
796 
797       w += count;
798    }
799 
800    assert(w == end);
801    return w;
802 }
803 
804 static bool
vtn_handle_non_semantic_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)805 vtn_handle_non_semantic_instruction(struct vtn_builder *b, SpvOp ext_opcode,
806                                     const uint32_t *w, unsigned count)
807 {
808    /* Do nothing. */
809    return true;
810 }
811 
812 static bool
vtn_handle_non_semantic_debug_break_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)813 vtn_handle_non_semantic_debug_break_instruction(struct vtn_builder *b, SpvOp ext_opcode,
814                                                 const uint32_t *w, unsigned count)
815 {
816    nir_debug_break(&b->nb);
817    return true;
818 }
819 
820 static void
vtn_handle_extension(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)821 vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
822                      const uint32_t *w, unsigned count)
823 {
824    switch (opcode) {
825    case SpvOpExtInstImport: {
826       struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
827       const char *ext = vtn_string_literal(b, &w[2], count - 2, NULL);
828       if (strcmp(ext, "GLSL.std.450") == 0) {
829          val->ext_handler = vtn_handle_glsl450_instruction;
830       } else if ((strcmp(ext, "SPV_AMD_gcn_shader") == 0)
831                 && (b->options && b->options->amd_gcn_shader)) {
832          val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
833       } else if ((strcmp(ext, "SPV_AMD_shader_ballot") == 0)
834                 && (b->options && b->options->amd_shader_ballot)) {
835          val->ext_handler = vtn_handle_amd_shader_ballot_instruction;
836       } else if ((strcmp(ext, "SPV_AMD_shader_trinary_minmax") == 0)
837                 && (b->options && b->options->amd_trinary_minmax)) {
838          val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction;
839       } else if ((strcmp(ext, "SPV_AMD_shader_explicit_vertex_parameter") == 0)
840                 && (b->options && b->options->amd_shader_explicit_vertex_parameter)) {
841          val->ext_handler = vtn_handle_amd_shader_explicit_vertex_parameter_instruction;
842       } else if (strcmp(ext, "OpenCL.std") == 0) {
843          val->ext_handler = vtn_handle_opencl_instruction;
844       } else if ((strcmp(ext, "NonSemantic.DebugBreak") == 0)
845                 && (b->options && b->options->emit_debug_break)) {
846          val->ext_handler = vtn_handle_non_semantic_debug_break_instruction;
847       } else if (strstr(ext, "NonSemantic.") == ext) {
848          val->ext_handler = vtn_handle_non_semantic_instruction;
849       } else {
850          vtn_fail("Unsupported extension: %s", ext);
851       }
852       break;
853    }
854 
855    case SpvOpExtInst:
856    case SpvOpExtInstWithForwardRefsKHR: {
857       struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
858 
859       if (opcode == SpvOpExtInstWithForwardRefsKHR)
860          assert(val->ext_handler == vtn_handle_non_semantic_instruction);
861 
862       bool handled = val->ext_handler(b, w[4], w, count);
863       vtn_assert(handled);
864       break;
865    }
866 
867    default:
868       vtn_fail_with_opcode("Unhandled opcode", opcode);
869    }
870 }
871 
872 static void
_foreach_decoration_helper(struct vtn_builder * b,struct vtn_value * base_value,int parent_member,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)873 _foreach_decoration_helper(struct vtn_builder *b,
874                            struct vtn_value *base_value,
875                            int parent_member,
876                            struct vtn_value *value,
877                            vtn_decoration_foreach_cb cb, void *data)
878 {
879    for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
880       int member;
881       if (dec->scope == VTN_DEC_DECORATION) {
882          member = parent_member;
883       } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
884          vtn_fail_if(value->value_type != vtn_value_type_type ||
885                      value->type->base_type != vtn_base_type_struct,
886                      "OpMemberDecorate and OpGroupMemberDecorate are only "
887                      "allowed on OpTypeStruct");
888          /* This means we haven't recursed yet */
889          assert(value == base_value);
890 
891          member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
892 
893          vtn_fail_if(member >= base_value->type->length,
894                      "OpMemberDecorate specifies member %d but the "
895                      "OpTypeStruct has only %u members",
896                      member, base_value->type->length);
897       } else {
898          /* Not a decoration */
899          assert(dec->scope == VTN_DEC_EXECUTION_MODE ||
900                 dec->scope <= VTN_DEC_STRUCT_MEMBER_NAME0);
901          continue;
902       }
903 
904       if (dec->group) {
905          assert(dec->group->value_type == vtn_value_type_decoration_group);
906          _foreach_decoration_helper(b, base_value, member, dec->group,
907                                     cb, data);
908       } else {
909          cb(b, base_value, member, dec, data);
910       }
911    }
912 }
913 
914 /** Iterates (recursively if needed) over all of the decorations on a value
915  *
916  * This function iterates over all of the decorations applied to a given
917  * value.  If it encounters a decoration group, it recurses into the group
918  * and iterates over all of those decorations as well.
919  */
920 void
vtn_foreach_decoration(struct vtn_builder * b,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)921 vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
922                        vtn_decoration_foreach_cb cb, void *data)
923 {
924    _foreach_decoration_helper(b, value, -1, value, cb, data);
925 }
926 
927 void
vtn_foreach_execution_mode(struct vtn_builder * b,struct vtn_value * value,vtn_execution_mode_foreach_cb cb,void * data)928 vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
929                            vtn_execution_mode_foreach_cb cb, void *data)
930 {
931    for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
932       if (dec->scope != VTN_DEC_EXECUTION_MODE)
933          continue;
934 
935       assert(dec->group == NULL);
936       cb(b, value, dec, data);
937    }
938 }
939 
940 void
vtn_handle_decoration(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)941 vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
942                       const uint32_t *w, unsigned count)
943 {
944    const uint32_t *w_end = w + count;
945    const uint32_t target = w[1];
946    w += 2;
947 
948    switch (opcode) {
949    case SpvOpDecorationGroup:
950       vtn_push_value(b, target, vtn_value_type_decoration_group);
951       break;
952 
953    case SpvOpDecorate:
954    case SpvOpDecorateId:
955    case SpvOpMemberDecorate:
956    case SpvOpDecorateString:
957    case SpvOpMemberDecorateString:
958    case SpvOpExecutionMode:
959    case SpvOpExecutionModeId: {
960       struct vtn_value *val = vtn_untyped_value(b, target);
961 
962       struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
963       switch (opcode) {
964       case SpvOpDecorate:
965       case SpvOpDecorateId:
966       case SpvOpDecorateString:
967          dec->scope = VTN_DEC_DECORATION;
968          break;
969       case SpvOpMemberDecorate:
970       case SpvOpMemberDecorateString:
971          dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
972          vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */
973                      "Member argument of OpMemberDecorate too large");
974          break;
975       case SpvOpExecutionMode:
976       case SpvOpExecutionModeId:
977          dec->scope = VTN_DEC_EXECUTION_MODE;
978          break;
979       default:
980          unreachable("Invalid decoration opcode");
981       }
982       dec->decoration = *(w++);
983       dec->num_operands = w_end - w;
984       dec->operands = w;
985 
986       /* Link into the list */
987       dec->next = val->decoration;
988       val->decoration = dec;
989       break;
990    }
991 
992    case SpvOpMemberName: {
993       struct vtn_value *val = vtn_untyped_value(b, target);
994       struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
995 
996       dec->scope = VTN_DEC_STRUCT_MEMBER_NAME0 - *(w++);
997 
998       dec->member_name = vtn_string_literal(b, w, w_end - w, NULL);
999 
1000       dec->next = val->decoration;
1001       val->decoration = dec;
1002       break;
1003    }
1004 
1005    case SpvOpGroupMemberDecorate:
1006    case SpvOpGroupDecorate: {
1007       struct vtn_value *group =
1008          vtn_value(b, target, vtn_value_type_decoration_group);
1009 
1010       for (; w < w_end; w++) {
1011          struct vtn_value *val = vtn_untyped_value(b, *w);
1012          struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
1013 
1014          dec->group = group;
1015          if (opcode == SpvOpGroupDecorate) {
1016             dec->scope = VTN_DEC_DECORATION;
1017          } else {
1018             dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
1019             vtn_fail_if(dec->scope < 0, /* Check for overflow */
1020                         "Member argument of OpGroupMemberDecorate too large");
1021          }
1022 
1023          /* Link into the list */
1024          dec->next = val->decoration;
1025          val->decoration = dec;
1026       }
1027       break;
1028    }
1029 
1030    default:
1031       unreachable("Unhandled opcode");
1032    }
1033 }
1034 
1035 struct member_decoration_ctx {
1036    unsigned num_fields;
1037    struct glsl_struct_field *fields;
1038    struct vtn_type *type;
1039 };
1040 
1041 /**
1042  * Returns true if the given type contains a struct decorated Block or
1043  * BufferBlock
1044  */
1045 bool
vtn_type_contains_block(struct vtn_builder * b,struct vtn_type * type)1046 vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type)
1047 {
1048    switch (type->base_type) {
1049    case vtn_base_type_array:
1050       return vtn_type_contains_block(b, type->array_element);
1051    case vtn_base_type_struct:
1052       if (type->block || type->buffer_block)
1053          return true;
1054       for (unsigned i = 0; i < type->length; i++) {
1055          if (vtn_type_contains_block(b, type->members[i]))
1056             return true;
1057       }
1058       return false;
1059    default:
1060       return false;
1061    }
1062 }
1063 
1064 /** Returns true if two types are "compatible", i.e. you can do an OpLoad,
1065  * OpStore, or OpCopyMemory between them without breaking anything.
1066  * Technically, the SPIR-V rules require the exact same type ID but this lets
1067  * us internally be a bit looser.
1068  */
1069 bool
vtn_types_compatible(struct vtn_builder * b,struct vtn_type * t1,struct vtn_type * t2)1070 vtn_types_compatible(struct vtn_builder *b,
1071                      struct vtn_type *t1, struct vtn_type *t2)
1072 {
1073    if (t1->id == t2->id)
1074       return true;
1075 
1076    if (t1->base_type != t2->base_type)
1077       return false;
1078 
1079    switch (t1->base_type) {
1080    case vtn_base_type_void:
1081    case vtn_base_type_scalar:
1082    case vtn_base_type_vector:
1083    case vtn_base_type_matrix:
1084    case vtn_base_type_image:
1085    case vtn_base_type_sampler:
1086    case vtn_base_type_sampled_image:
1087    case vtn_base_type_event:
1088    case vtn_base_type_cooperative_matrix:
1089       return t1->type == t2->type;
1090 
1091    case vtn_base_type_array:
1092       return t1->length == t2->length &&
1093              vtn_types_compatible(b, t1->array_element, t2->array_element);
1094 
1095    case vtn_base_type_pointer:
1096       return vtn_types_compatible(b, t1->pointed, t2->pointed);
1097 
1098    case vtn_base_type_struct:
1099       if (t1->length != t2->length)
1100          return false;
1101 
1102       for (unsigned i = 0; i < t1->length; i++) {
1103          if (!vtn_types_compatible(b, t1->members[i], t2->members[i]))
1104             return false;
1105       }
1106       return true;
1107 
1108    case vtn_base_type_accel_struct:
1109    case vtn_base_type_ray_query:
1110       return true;
1111 
1112    case vtn_base_type_function:
1113       /* This case shouldn't get hit since you can't copy around function
1114        * types.  Just require them to be identical.
1115        */
1116       return false;
1117    }
1118 
1119    vtn_fail("Invalid base type");
1120 }
1121 
1122 struct vtn_type *
vtn_type_without_array(struct vtn_type * type)1123 vtn_type_without_array(struct vtn_type *type)
1124 {
1125    while (type->base_type == vtn_base_type_array)
1126       type = type->array_element;
1127    return type;
1128 }
1129 
1130 /* does a shallow copy of a vtn_type */
1131 
1132 static struct vtn_type *
vtn_type_copy(struct vtn_builder * b,struct vtn_type * src)1133 vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
1134 {
1135    struct vtn_type *dest = vtn_alloc(b, struct vtn_type);
1136    *dest = *src;
1137 
1138    switch (src->base_type) {
1139    case vtn_base_type_void:
1140    case vtn_base_type_scalar:
1141    case vtn_base_type_vector:
1142    case vtn_base_type_matrix:
1143    case vtn_base_type_array:
1144    case vtn_base_type_pointer:
1145    case vtn_base_type_image:
1146    case vtn_base_type_sampler:
1147    case vtn_base_type_sampled_image:
1148    case vtn_base_type_event:
1149    case vtn_base_type_accel_struct:
1150    case vtn_base_type_ray_query:
1151    case vtn_base_type_cooperative_matrix:
1152       /* Nothing more to do */
1153       break;
1154 
1155    case vtn_base_type_struct:
1156       dest->members = vtn_alloc_array(b, struct vtn_type *, src->length);
1157       memcpy(dest->members, src->members,
1158              src->length * sizeof(src->members[0]));
1159 
1160       dest->offsets = vtn_alloc_array(b, unsigned, src->length);
1161       memcpy(dest->offsets, src->offsets,
1162              src->length * sizeof(src->offsets[0]));
1163       break;
1164 
1165    case vtn_base_type_function:
1166       dest->params = vtn_alloc_array(b, struct vtn_type *, src->length);
1167       memcpy(dest->params, src->params, src->length * sizeof(src->params[0]));
1168       break;
1169    }
1170 
1171    return dest;
1172 }
1173 
1174 static bool
vtn_type_needs_explicit_layout(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)1175 vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type,
1176                                enum vtn_variable_mode mode)
1177 {
1178    /* For OpenCL we never want to strip the info from the types, and it makes
1179     * type comparisons easier in later stages.
1180     */
1181    if (b->options->environment == NIR_SPIRV_OPENCL)
1182       return true;
1183 
1184    switch (mode) {
1185    case vtn_variable_mode_input:
1186    case vtn_variable_mode_output:
1187       /* Layout decorations kept because we need offsets for XFB arrays of
1188        * blocks.
1189        */
1190       return b->shader->info.has_transform_feedback_varyings;
1191 
1192    case vtn_variable_mode_ssbo:
1193    case vtn_variable_mode_phys_ssbo:
1194    case vtn_variable_mode_ubo:
1195    case vtn_variable_mode_push_constant:
1196    case vtn_variable_mode_shader_record:
1197       return true;
1198 
1199    case vtn_variable_mode_workgroup:
1200       return b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR;
1201 
1202    default:
1203       return false;
1204    }
1205 }
1206 
1207 const struct glsl_type *
vtn_type_get_nir_type(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)1208 vtn_type_get_nir_type(struct vtn_builder *b, struct vtn_type *type,
1209                       enum vtn_variable_mode mode)
1210 {
1211    if (mode == vtn_variable_mode_atomic_counter) {
1212       vtn_fail_if(glsl_without_array(type->type) != glsl_uint_type(),
1213                   "Variables in the AtomicCounter storage class should be "
1214                   "(possibly arrays of arrays of) uint.");
1215       return glsl_type_wrap_in_arrays(glsl_atomic_uint_type(), type->type);
1216    }
1217 
1218    if (mode == vtn_variable_mode_uniform) {
1219       switch (type->base_type) {
1220       case vtn_base_type_array: {
1221          const struct glsl_type *elem_type =
1222             vtn_type_get_nir_type(b, type->array_element, mode);
1223 
1224          return glsl_array_type(elem_type, type->length,
1225                                 glsl_get_explicit_stride(type->type));
1226       }
1227 
1228       case vtn_base_type_struct: {
1229          bool need_new_struct = false;
1230          const uint32_t num_fields = type->length;
1231          NIR_VLA(struct glsl_struct_field, fields, num_fields);
1232          for (unsigned i = 0; i < num_fields; i++) {
1233             fields[i] = *glsl_get_struct_field_data(type->type, i);
1234             const struct glsl_type *field_nir_type =
1235                vtn_type_get_nir_type(b, type->members[i], mode);
1236             if (fields[i].type != field_nir_type) {
1237                fields[i].type = field_nir_type;
1238                need_new_struct = true;
1239             }
1240          }
1241          if (need_new_struct) {
1242             if (glsl_type_is_interface(type->type)) {
1243                return glsl_interface_type(fields, num_fields,
1244                                           /* packing */ 0, false,
1245                                           glsl_get_type_name(type->type));
1246             } else {
1247                return glsl_struct_type(fields, num_fields,
1248                                        glsl_get_type_name(type->type),
1249                                        glsl_struct_type_is_packed(type->type));
1250             }
1251          } else {
1252             /* No changes, just pass it on */
1253             return type->type;
1254          }
1255       }
1256 
1257       case vtn_base_type_image:
1258          vtn_assert(glsl_type_is_texture(type->glsl_image));
1259          return type->glsl_image;
1260 
1261       case vtn_base_type_sampler:
1262          return glsl_bare_sampler_type();
1263 
1264       case vtn_base_type_sampled_image:
1265          return glsl_texture_type_to_sampler(type->image->glsl_image,
1266                                              false /* is_shadow */);
1267 
1268       default:
1269          return type->type;
1270       }
1271    }
1272 
1273    if (mode == vtn_variable_mode_image) {
1274       struct vtn_type *image_type = vtn_type_without_array(type);
1275       vtn_assert(image_type->base_type == vtn_base_type_image);
1276       return glsl_type_wrap_in_arrays(image_type->glsl_image, type->type);
1277    }
1278 
1279    /* Layout decorations are allowed but ignored in certain conditions,
1280     * to allow SPIR-V generators perform type deduplication.  Discard
1281     * unnecessary ones when passing to NIR.
1282     */
1283    if (!vtn_type_needs_explicit_layout(b, type, mode))
1284       return glsl_get_bare_type(type->type);
1285 
1286    return type->type;
1287 }
1288 
1289 static struct vtn_type *
mutable_matrix_member(struct vtn_builder * b,struct vtn_type * type,int member)1290 mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
1291 {
1292    type->members[member] = vtn_type_copy(b, type->members[member]);
1293    type = type->members[member];
1294 
1295    /* We may have an array of matrices.... Oh, joy! */
1296    while (glsl_type_is_array(type->type)) {
1297       type->array_element = vtn_type_copy(b, type->array_element);
1298       type = type->array_element;
1299    }
1300 
1301    vtn_assert(glsl_type_is_matrix(type->type));
1302 
1303    return type;
1304 }
1305 
1306 static void
vtn_handle_access_qualifier(struct vtn_builder * b,struct vtn_type * type,int member,enum gl_access_qualifier access)1307 vtn_handle_access_qualifier(struct vtn_builder *b, struct vtn_type *type,
1308                             int member, enum gl_access_qualifier access)
1309 {
1310    type->members[member] = vtn_type_copy(b, type->members[member]);
1311    type = type->members[member];
1312 
1313    type->access |= access;
1314 }
1315 
1316 static void
array_stride_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1317 array_stride_decoration_cb(struct vtn_builder *b,
1318                            struct vtn_value *val, int member,
1319                            const struct vtn_decoration *dec, void *void_ctx)
1320 {
1321    struct vtn_type *type = val->type;
1322 
1323    if (dec->decoration == SpvDecorationArrayStride) {
1324       if (vtn_type_contains_block(b, type)) {
1325          vtn_warn("The ArrayStride decoration cannot be applied to an array "
1326                   "type which contains a structure type decorated Block "
1327                   "or BufferBlock");
1328          /* Ignore the decoration */
1329       } else {
1330          vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero");
1331          type->stride = dec->operands[0];
1332       }
1333    }
1334 }
1335 
1336 static void
struct_member_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1337 struct_member_decoration_cb(struct vtn_builder *b,
1338                             UNUSED struct vtn_value *val, int member,
1339                             const struct vtn_decoration *dec, void *void_ctx)
1340 {
1341    struct member_decoration_ctx *ctx = void_ctx;
1342 
1343    if (member < 0)
1344       return;
1345 
1346    assert(member < ctx->num_fields);
1347 
1348    switch (dec->decoration) {
1349    case SpvDecorationRelaxedPrecision:
1350    case SpvDecorationUniform:
1351    case SpvDecorationUniformId:
1352       break; /* FIXME: Do nothing with this for now. */
1353    case SpvDecorationNonWritable:
1354       vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_WRITEABLE);
1355       break;
1356    case SpvDecorationNonReadable:
1357       vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_READABLE);
1358       break;
1359    case SpvDecorationVolatile:
1360       vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_VOLATILE);
1361       break;
1362    case SpvDecorationCoherent:
1363       vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_COHERENT);
1364       break;
1365    case SpvDecorationNoPerspective:
1366       ctx->fields[member].interpolation = INTERP_MODE_NOPERSPECTIVE;
1367       break;
1368    case SpvDecorationFlat:
1369       ctx->fields[member].interpolation = INTERP_MODE_FLAT;
1370       break;
1371    case SpvDecorationExplicitInterpAMD:
1372       ctx->fields[member].interpolation = INTERP_MODE_EXPLICIT;
1373       break;
1374    case SpvDecorationCentroid:
1375       ctx->fields[member].centroid = true;
1376       break;
1377    case SpvDecorationSample:
1378       ctx->fields[member].sample = true;
1379       break;
1380    case SpvDecorationStream:
1381       /* This is handled later by var_decoration_cb in vtn_variables.c */
1382       break;
1383    case SpvDecorationLocation:
1384       ctx->fields[member].location = dec->operands[0];
1385       break;
1386    case SpvDecorationComponent:
1387       break; /* FIXME: What should we do with these? */
1388    case SpvDecorationBuiltIn:
1389       ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
1390       ctx->type->members[member]->is_builtin = true;
1391       ctx->type->members[member]->builtin = dec->operands[0];
1392       ctx->type->builtin_block = true;
1393       break;
1394    case SpvDecorationOffset:
1395       ctx->type->offsets[member] = dec->operands[0];
1396       ctx->fields[member].offset = dec->operands[0];
1397       break;
1398    case SpvDecorationMatrixStride:
1399       /* Handled as a second pass */
1400       break;
1401    case SpvDecorationColMajor:
1402       break; /* Nothing to do here.  Column-major is the default. */
1403    case SpvDecorationRowMajor:
1404       mutable_matrix_member(b, ctx->type, member)->row_major = true;
1405       break;
1406 
1407    case SpvDecorationPatch:
1408    case SpvDecorationPerPrimitiveNV:
1409    case SpvDecorationPerTaskNV:
1410    case SpvDecorationPerViewNV:
1411       break;
1412 
1413    case SpvDecorationSpecId:
1414    case SpvDecorationBlock:
1415    case SpvDecorationBufferBlock:
1416    case SpvDecorationArrayStride:
1417    case SpvDecorationGLSLShared:
1418    case SpvDecorationGLSLPacked:
1419    case SpvDecorationAliased:
1420    case SpvDecorationConstant:
1421    case SpvDecorationIndex:
1422    case SpvDecorationBinding:
1423    case SpvDecorationDescriptorSet:
1424    case SpvDecorationLinkageAttributes:
1425    case SpvDecorationNoContraction:
1426    case SpvDecorationInputAttachmentIndex:
1427    case SpvDecorationCPacked:
1428       vtn_warn("Decoration not allowed on struct members: %s",
1429                spirv_decoration_to_string(dec->decoration));
1430       break;
1431 
1432    case SpvDecorationRestrict:
1433       /* While "Restrict" is invalid for struct members, glslang incorrectly
1434        * generates it and it ends up hiding actual driver issues in a wall of
1435        * spam from deqp-vk.  Return it to the above block once the issue is
1436        * resolved.  https://github.com/KhronosGroup/glslang/issues/703
1437        */
1438       break;
1439 
1440    case SpvDecorationInvariant:
1441       /* Also incorrectly generated by glslang, ignore it. */
1442       break;
1443 
1444    case SpvDecorationXfbBuffer:
1445    case SpvDecorationXfbStride:
1446       /* This is handled later by var_decoration_cb in vtn_variables.c */
1447       break;
1448 
1449    case SpvDecorationSaturatedConversion:
1450    case SpvDecorationFuncParamAttr:
1451    case SpvDecorationFPRoundingMode:
1452    case SpvDecorationAlignment:
1453       if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1454          vtn_warn("Decoration only allowed for CL-style kernels: %s",
1455                   spirv_decoration_to_string(dec->decoration));
1456       }
1457       break;
1458 
1459    case SpvDecorationFPFastMathMode:
1460       /* See handle_fp_fast_math(). */
1461       break;
1462 
1463    case SpvDecorationUserSemantic:
1464    case SpvDecorationUserTypeGOOGLE:
1465       /* User semantic decorations can safely be ignored by the driver. */
1466       break;
1467 
1468    default:
1469       vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1470    }
1471 }
1472 
1473 /** Chases the array type all the way down to the tail and rewrites the
1474  * glsl_types to be based off the tail's glsl_type.
1475  */
1476 static void
vtn_array_type_rewrite_glsl_type(struct vtn_type * type)1477 vtn_array_type_rewrite_glsl_type(struct vtn_type *type)
1478 {
1479    if (type->base_type != vtn_base_type_array)
1480       return;
1481 
1482    vtn_array_type_rewrite_glsl_type(type->array_element);
1483 
1484    type->type = glsl_array_type(type->array_element->type,
1485                                 type->length, type->stride);
1486 }
1487 
1488 /* Matrix strides are handled as a separate pass because we need to know
1489  * whether the matrix is row-major or not first.
1490  */
1491 static void
struct_member_matrix_stride_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1492 struct_member_matrix_stride_cb(struct vtn_builder *b,
1493                                UNUSED struct vtn_value *val, int member,
1494                                const struct vtn_decoration *dec,
1495                                void *void_ctx)
1496 {
1497    if (dec->decoration != SpvDecorationMatrixStride)
1498       return;
1499 
1500    vtn_fail_if(member < 0,
1501                "The MatrixStride decoration is only allowed on members "
1502                "of OpTypeStruct");
1503    vtn_fail_if(dec->operands[0] == 0, "MatrixStride must be non-zero");
1504 
1505    struct member_decoration_ctx *ctx = void_ctx;
1506 
1507    struct vtn_type *mat_type = mutable_matrix_member(b, ctx->type, member);
1508    if (mat_type->row_major) {
1509       mat_type->array_element = vtn_type_copy(b, mat_type->array_element);
1510       mat_type->stride = mat_type->array_element->stride;
1511       mat_type->array_element->stride = dec->operands[0];
1512 
1513       mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1514                                                  dec->operands[0], true);
1515       mat_type->array_element->type = glsl_get_column_type(mat_type->type);
1516    } else {
1517       vtn_assert(mat_type->array_element->stride > 0);
1518       mat_type->stride = dec->operands[0];
1519 
1520       mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1521                                                  dec->operands[0], false);
1522    }
1523 
1524    /* Now that we've replaced the glsl_type with a properly strided matrix
1525     * type, rewrite the member type so that it's an array of the proper kind
1526     * of glsl_type.
1527     */
1528    vtn_array_type_rewrite_glsl_type(ctx->type->members[member]);
1529    ctx->fields[member].type = ctx->type->members[member]->type;
1530 }
1531 
1532 static void
struct_packed_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1533 struct_packed_decoration_cb(struct vtn_builder *b,
1534                             struct vtn_value *val, int member,
1535                             const struct vtn_decoration *dec, void *void_ctx)
1536 {
1537    vtn_assert(val->type->base_type == vtn_base_type_struct);
1538    if (dec->decoration == SpvDecorationCPacked) {
1539       if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1540          vtn_warn("Decoration only allowed for CL-style kernels: %s",
1541                   spirv_decoration_to_string(dec->decoration));
1542       }
1543       val->type->packed = true;
1544    }
1545 }
1546 
1547 static void
struct_block_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * ctx)1548 struct_block_decoration_cb(struct vtn_builder *b,
1549                            struct vtn_value *val, int member,
1550                            const struct vtn_decoration *dec, void *ctx)
1551 {
1552    if (member != -1)
1553       return;
1554 
1555    struct vtn_type *type = val->type;
1556    if (dec->decoration == SpvDecorationBlock)
1557       type->block = true;
1558    else if (dec->decoration == SpvDecorationBufferBlock)
1559       type->buffer_block = true;
1560 }
1561 
1562 static void
type_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,UNUSED void * ctx)1563 type_decoration_cb(struct vtn_builder *b,
1564                    struct vtn_value *val, int member,
1565                    const struct vtn_decoration *dec, UNUSED void *ctx)
1566 {
1567    struct vtn_type *type = val->type;
1568 
1569    if (member != -1) {
1570       /* This should have been handled by OpTypeStruct */
1571       assert(val->type->base_type == vtn_base_type_struct);
1572       assert(member >= 0 && member < val->type->length);
1573       return;
1574    }
1575 
1576    switch (dec->decoration) {
1577    case SpvDecorationArrayStride:
1578       vtn_assert(type->base_type == vtn_base_type_array ||
1579                  type->base_type == vtn_base_type_pointer);
1580       break;
1581    case SpvDecorationBlock:
1582       vtn_assert(type->base_type == vtn_base_type_struct);
1583       vtn_assert(type->block);
1584       break;
1585    case SpvDecorationBufferBlock:
1586       vtn_assert(type->base_type == vtn_base_type_struct);
1587       vtn_assert(type->buffer_block);
1588       break;
1589    case SpvDecorationGLSLShared:
1590    case SpvDecorationGLSLPacked:
1591       /* Ignore these, since we get explicit offsets anyways */
1592       break;
1593 
1594    case SpvDecorationRowMajor:
1595    case SpvDecorationColMajor:
1596    case SpvDecorationMatrixStride:
1597    case SpvDecorationBuiltIn:
1598    case SpvDecorationNoPerspective:
1599    case SpvDecorationFlat:
1600    case SpvDecorationPatch:
1601    case SpvDecorationCentroid:
1602    case SpvDecorationSample:
1603    case SpvDecorationExplicitInterpAMD:
1604    case SpvDecorationVolatile:
1605    case SpvDecorationCoherent:
1606    case SpvDecorationNonWritable:
1607    case SpvDecorationNonReadable:
1608    case SpvDecorationUniform:
1609    case SpvDecorationUniformId:
1610    case SpvDecorationLocation:
1611    case SpvDecorationComponent:
1612    case SpvDecorationOffset:
1613    case SpvDecorationXfbBuffer:
1614    case SpvDecorationXfbStride:
1615    case SpvDecorationUserSemantic:
1616       vtn_warn("Decoration only allowed for struct members: %s",
1617                spirv_decoration_to_string(dec->decoration));
1618       break;
1619 
1620    case SpvDecorationStream:
1621       /* We don't need to do anything here, as stream is filled up when
1622        * aplying the decoration to a variable, just check that if it is not a
1623        * struct member, it should be a struct.
1624        */
1625       vtn_assert(type->base_type == vtn_base_type_struct);
1626       break;
1627 
1628    case SpvDecorationRelaxedPrecision:
1629    case SpvDecorationSpecId:
1630    case SpvDecorationInvariant:
1631    case SpvDecorationRestrict:
1632    case SpvDecorationAliased:
1633    case SpvDecorationConstant:
1634    case SpvDecorationIndex:
1635    case SpvDecorationBinding:
1636    case SpvDecorationDescriptorSet:
1637    case SpvDecorationLinkageAttributes:
1638    case SpvDecorationNoContraction:
1639    case SpvDecorationInputAttachmentIndex:
1640       vtn_warn("Decoration not allowed on types: %s",
1641                spirv_decoration_to_string(dec->decoration));
1642       break;
1643 
1644    case SpvDecorationCPacked:
1645       /* Handled when parsing a struct type, nothing to do here. */
1646       break;
1647 
1648    case SpvDecorationSaturatedConversion:
1649    case SpvDecorationFuncParamAttr:
1650    case SpvDecorationFPRoundingMode:
1651    case SpvDecorationAlignment:
1652       vtn_warn("Decoration only allowed for CL-style kernels: %s",
1653                spirv_decoration_to_string(dec->decoration));
1654       break;
1655 
1656    case SpvDecorationFPFastMathMode:
1657       /* See handle_fp_fast_math(). */
1658       break;
1659 
1660    case SpvDecorationUserTypeGOOGLE:
1661       /* User semantic decorations can safely be ignored by the driver. */
1662       break;
1663 
1664    default:
1665       vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1666    }
1667 }
1668 
1669 static unsigned
translate_image_format(struct vtn_builder * b,SpvImageFormat format)1670 translate_image_format(struct vtn_builder *b, SpvImageFormat format)
1671 {
1672    switch (format) {
1673    case SpvImageFormatUnknown:      return PIPE_FORMAT_NONE;
1674    case SpvImageFormatRgba32f:      return PIPE_FORMAT_R32G32B32A32_FLOAT;
1675    case SpvImageFormatRgba16f:      return PIPE_FORMAT_R16G16B16A16_FLOAT;
1676    case SpvImageFormatR32f:         return PIPE_FORMAT_R32_FLOAT;
1677    case SpvImageFormatRgba8:        return PIPE_FORMAT_R8G8B8A8_UNORM;
1678    case SpvImageFormatRgba8Snorm:   return PIPE_FORMAT_R8G8B8A8_SNORM;
1679    case SpvImageFormatRg32f:        return PIPE_FORMAT_R32G32_FLOAT;
1680    case SpvImageFormatRg16f:        return PIPE_FORMAT_R16G16_FLOAT;
1681    case SpvImageFormatR11fG11fB10f: return PIPE_FORMAT_R11G11B10_FLOAT;
1682    case SpvImageFormatR16f:         return PIPE_FORMAT_R16_FLOAT;
1683    case SpvImageFormatRgba16:       return PIPE_FORMAT_R16G16B16A16_UNORM;
1684    case SpvImageFormatRgb10A2:      return PIPE_FORMAT_R10G10B10A2_UNORM;
1685    case SpvImageFormatRg16:         return PIPE_FORMAT_R16G16_UNORM;
1686    case SpvImageFormatRg8:          return PIPE_FORMAT_R8G8_UNORM;
1687    case SpvImageFormatR16:          return PIPE_FORMAT_R16_UNORM;
1688    case SpvImageFormatR8:           return PIPE_FORMAT_R8_UNORM;
1689    case SpvImageFormatRgba16Snorm:  return PIPE_FORMAT_R16G16B16A16_SNORM;
1690    case SpvImageFormatRg16Snorm:    return PIPE_FORMAT_R16G16_SNORM;
1691    case SpvImageFormatRg8Snorm:     return PIPE_FORMAT_R8G8_SNORM;
1692    case SpvImageFormatR16Snorm:     return PIPE_FORMAT_R16_SNORM;
1693    case SpvImageFormatR8Snorm:      return PIPE_FORMAT_R8_SNORM;
1694    case SpvImageFormatRgba32i:      return PIPE_FORMAT_R32G32B32A32_SINT;
1695    case SpvImageFormatRgba16i:      return PIPE_FORMAT_R16G16B16A16_SINT;
1696    case SpvImageFormatRgba8i:       return PIPE_FORMAT_R8G8B8A8_SINT;
1697    case SpvImageFormatR32i:         return PIPE_FORMAT_R32_SINT;
1698    case SpvImageFormatRg32i:        return PIPE_FORMAT_R32G32_SINT;
1699    case SpvImageFormatRg16i:        return PIPE_FORMAT_R16G16_SINT;
1700    case SpvImageFormatRg8i:         return PIPE_FORMAT_R8G8_SINT;
1701    case SpvImageFormatR16i:         return PIPE_FORMAT_R16_SINT;
1702    case SpvImageFormatR8i:          return PIPE_FORMAT_R8_SINT;
1703    case SpvImageFormatRgba32ui:     return PIPE_FORMAT_R32G32B32A32_UINT;
1704    case SpvImageFormatRgba16ui:     return PIPE_FORMAT_R16G16B16A16_UINT;
1705    case SpvImageFormatRgba8ui:      return PIPE_FORMAT_R8G8B8A8_UINT;
1706    case SpvImageFormatR32ui:        return PIPE_FORMAT_R32_UINT;
1707    case SpvImageFormatRgb10a2ui:    return PIPE_FORMAT_R10G10B10A2_UINT;
1708    case SpvImageFormatRg32ui:       return PIPE_FORMAT_R32G32_UINT;
1709    case SpvImageFormatRg16ui:       return PIPE_FORMAT_R16G16_UINT;
1710    case SpvImageFormatRg8ui:        return PIPE_FORMAT_R8G8_UINT;
1711    case SpvImageFormatR16ui:        return PIPE_FORMAT_R16_UINT;
1712    case SpvImageFormatR8ui:         return PIPE_FORMAT_R8_UINT;
1713    case SpvImageFormatR64ui:        return PIPE_FORMAT_R64_UINT;
1714    case SpvImageFormatR64i:         return PIPE_FORMAT_R64_SINT;
1715    default:
1716       vtn_fail("Invalid image format: %s (%u)",
1717                spirv_imageformat_to_string(format), format);
1718    }
1719 }
1720 
1721 static void
validate_image_type_for_sampled_image(struct vtn_builder * b,const struct glsl_type * image_type,const char * operand)1722 validate_image_type_for_sampled_image(struct vtn_builder *b,
1723                                       const struct glsl_type *image_type,
1724                                       const char *operand)
1725 {
1726    /* From OpTypeSampledImage description in SPIR-V 1.6, revision 1:
1727     *
1728     *   Image Type must be an OpTypeImage. It is the type of the image in the
1729     *   combined sampler and image type. It must not have a Dim of
1730     *   SubpassData. Additionally, starting with version 1.6, it must not have
1731     *   a Dim of Buffer.
1732     *
1733     * Same also applies to the type of the Image operand in OpSampledImage.
1734     */
1735 
1736    const enum glsl_sampler_dim dim = glsl_get_sampler_dim(image_type);
1737 
1738    vtn_fail_if(dim == GLSL_SAMPLER_DIM_SUBPASS ||
1739                dim == GLSL_SAMPLER_DIM_SUBPASS_MS,
1740                "%s must not have a Dim of SubpassData.", operand);
1741 
1742    if (dim == GLSL_SAMPLER_DIM_BUF) {
1743       if (b->version >= 0x10600) {
1744          vtn_fail("Starting with SPIR-V 1.6, %s "
1745                   "must not have a Dim of Buffer.", operand);
1746       } else {
1747          vtn_warn("%s should not have a Dim of Buffer.", operand);
1748       }
1749    }
1750 }
1751 
1752 static void
vtn_handle_type(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1753 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
1754                 const uint32_t *w, unsigned count)
1755 {
1756    struct vtn_value *val = NULL;
1757 
1758    /* In order to properly handle forward declarations, we have to defer
1759     * allocation for pointer types.
1760     */
1761    if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) {
1762       val = vtn_push_value(b, w[1], vtn_value_type_type);
1763       vtn_fail_if(val->type != NULL,
1764                   "Only pointers can have forward declarations");
1765       val->type = vtn_zalloc(b, struct vtn_type);
1766       val->type->id = w[1];
1767    }
1768 
1769    switch (opcode) {
1770    case SpvOpTypeVoid:
1771       val->type->base_type = vtn_base_type_void;
1772       val->type->type = glsl_void_type();
1773       break;
1774    case SpvOpTypeBool:
1775       val->type->base_type = vtn_base_type_scalar;
1776       val->type->type = glsl_bool_type();
1777       val->type->length = 1;
1778       break;
1779    case SpvOpTypeInt: {
1780       int bit_size = w[2];
1781       const bool signedness = w[3];
1782       vtn_fail_if(bit_size != 8 && bit_size != 16 &&
1783                   bit_size != 32 && bit_size != 64,
1784                   "Invalid int bit size: %u", bit_size);
1785       val->type->base_type = vtn_base_type_scalar;
1786       val->type->type = signedness ? glsl_intN_t_type(bit_size) :
1787                                      glsl_uintN_t_type(bit_size);
1788       val->type->length = 1;
1789       break;
1790    }
1791 
1792    case SpvOpTypeFloat: {
1793       int bit_size = w[2];
1794       val->type->base_type = vtn_base_type_scalar;
1795       vtn_fail_if(bit_size != 16 && bit_size != 32 && bit_size != 64,
1796                   "Invalid float bit size: %u", bit_size);
1797       val->type->type = glsl_floatN_t_type(bit_size);
1798       val->type->length = 1;
1799       break;
1800    }
1801 
1802    case SpvOpTypeVector: {
1803       struct vtn_type *base = vtn_get_type(b, w[2]);
1804       unsigned elems = w[3];
1805 
1806       vtn_fail_if(base->base_type != vtn_base_type_scalar,
1807                   "Base type for OpTypeVector must be a scalar");
1808       vtn_fail_if((elems < 2 || elems > 4) && (elems != 8) && (elems != 16),
1809                   "Invalid component count for OpTypeVector");
1810 
1811       val->type->base_type = vtn_base_type_vector;
1812       val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
1813       val->type->length = elems;
1814       val->type->stride = glsl_type_is_boolean(val->type->type)
1815          ? 4 : glsl_get_bit_size(base->type) / 8;
1816       val->type->array_element = base;
1817       break;
1818    }
1819 
1820    case SpvOpTypeMatrix: {
1821       struct vtn_type *base = vtn_get_type(b, w[2]);
1822       unsigned columns = w[3];
1823 
1824       vtn_fail_if(base->base_type != vtn_base_type_vector,
1825                   "Base type for OpTypeMatrix must be a vector");
1826       vtn_fail_if(columns < 2 || columns > 4,
1827                   "Invalid column count for OpTypeMatrix");
1828 
1829       val->type->base_type = vtn_base_type_matrix;
1830       val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
1831                                          glsl_get_vector_elements(base->type),
1832                                          columns);
1833       vtn_fail_if(glsl_type_is_error(val->type->type),
1834                   "Unsupported base type for OpTypeMatrix");
1835       assert(!glsl_type_is_error(val->type->type));
1836       val->type->length = columns;
1837       val->type->array_element = base;
1838       val->type->row_major = false;
1839       val->type->stride = 0;
1840       break;
1841    }
1842 
1843    case SpvOpTypeRuntimeArray:
1844    case SpvOpTypeArray: {
1845       struct vtn_type *array_element = vtn_get_type(b, w[2]);
1846 
1847       if (opcode == SpvOpTypeRuntimeArray) {
1848          /* A length of 0 is used to denote unsized arrays */
1849          val->type->length = 0;
1850       } else {
1851          val->type->length = vtn_constant_uint(b, w[3]);
1852       }
1853 
1854       val->type->base_type = vtn_base_type_array;
1855       val->type->array_element = array_element;
1856 
1857       vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1858       val->type->type = glsl_array_type(array_element->type, val->type->length,
1859                                         val->type->stride);
1860       break;
1861    }
1862 
1863    case SpvOpTypeStruct: {
1864       unsigned num_fields = count - 2;
1865       val->type->base_type = vtn_base_type_struct;
1866       val->type->length = num_fields;
1867       val->type->members = vtn_alloc_array(b, struct vtn_type *, num_fields);
1868       val->type->offsets = vtn_alloc_array(b, unsigned, num_fields);
1869       val->type->packed = false;
1870 
1871       NIR_VLA(struct glsl_struct_field, fields, count);
1872       for (unsigned i = 0; i < num_fields; i++) {
1873          val->type->members[i] = vtn_get_type(b, w[i + 2]);
1874          const char *name = NULL;
1875          for (struct vtn_decoration *dec = val->decoration; dec; dec = dec->next) {
1876             if (dec->scope == VTN_DEC_STRUCT_MEMBER_NAME0 - i) {
1877                name = dec->member_name;
1878                break;
1879             }
1880          }
1881          if (!name)
1882             name = ralloc_asprintf(b, "field%d", i);
1883 
1884          fields[i] = (struct glsl_struct_field) {
1885             .type = val->type->members[i]->type,
1886             .name = name,
1887             .location = -1,
1888             .offset = -1,
1889          };
1890       }
1891 
1892       vtn_foreach_decoration(b, val, struct_packed_decoration_cb, NULL);
1893 
1894       struct member_decoration_ctx ctx = {
1895          .num_fields = num_fields,
1896          .fields = fields,
1897          .type = val->type
1898       };
1899 
1900       vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
1901 
1902       /* Propagate access specifiers that are present on all members to the overall type */
1903       enum gl_access_qualifier overall_access = ACCESS_COHERENT | ACCESS_VOLATILE |
1904                                                 ACCESS_NON_READABLE | ACCESS_NON_WRITEABLE;
1905       for (unsigned i = 0; i < num_fields; ++i)
1906          overall_access &= val->type->members[i]->access;
1907       val->type->access = overall_access;
1908 
1909       vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx);
1910 
1911       vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL);
1912 
1913       const char *name = val->name;
1914 
1915       if (val->type->block || val->type->buffer_block) {
1916          /* Packing will be ignored since types coming from SPIR-V are
1917           * explicitly laid out.
1918           */
1919          val->type->type = glsl_interface_type(fields, num_fields,
1920                                                /* packing */ 0, false,
1921                                                name ? name : "block");
1922       } else {
1923          val->type->type = glsl_struct_type(fields, num_fields,
1924                                             name ? name : "struct",
1925                                             val->type->packed);
1926       }
1927       break;
1928    }
1929 
1930    case SpvOpTypeFunction: {
1931       val->type->base_type = vtn_base_type_function;
1932       val->type->type = NULL;
1933 
1934       val->type->return_type = vtn_get_type(b, w[2]);
1935 
1936       const unsigned num_params = count - 3;
1937       val->type->length = num_params;
1938       val->type->params = vtn_alloc_array(b, struct vtn_type *, num_params);
1939       for (unsigned i = 0; i < count - 3; i++) {
1940          val->type->params[i] = vtn_get_type(b, w[i + 3]);
1941       }
1942       break;
1943    }
1944 
1945    case SpvOpTypePointer:
1946    case SpvOpTypeForwardPointer: {
1947       /* We can't blindly push the value because it might be a forward
1948        * declaration.
1949        */
1950       val = vtn_untyped_value(b, w[1]);
1951 
1952       SpvStorageClass storage_class = w[2];
1953 
1954       vtn_fail_if(opcode == SpvOpTypeForwardPointer &&
1955                   b->shader->info.stage != MESA_SHADER_KERNEL &&
1956                   storage_class != SpvStorageClassPhysicalStorageBuffer,
1957                   "OpTypeForwardPointer is only allowed in Vulkan with "
1958                   "the PhysicalStorageBuffer storage class");
1959 
1960       struct vtn_type *pointed_type = NULL;
1961       if (opcode == SpvOpTypePointer)
1962          pointed_type = vtn_get_type(b, w[3]);
1963 
1964       bool has_forward_pointer = false;
1965       if (val->value_type == vtn_value_type_invalid) {
1966          val->value_type = vtn_value_type_type;
1967          val->type = vtn_zalloc(b, struct vtn_type);
1968          val->type->id = w[1];
1969          val->type->base_type = vtn_base_type_pointer;
1970          val->type->storage_class = storage_class;
1971 
1972          /* These can actually be stored to nir_variables and used as SSA
1973           * values so they need a real glsl_type.
1974           */
1975          enum vtn_variable_mode mode = vtn_storage_class_to_mode(
1976             b, storage_class, pointed_type, NULL);
1977 
1978          /* The deref type should only matter for the UniformConstant storage
1979           * class.  In particular, it should never matter for any storage
1980           * classes that are allowed in combination with OpTypeForwardPointer.
1981           */
1982          if (storage_class != SpvStorageClassUniform &&
1983              storage_class != SpvStorageClassUniformConstant) {
1984             assert(mode == vtn_storage_class_to_mode(b, storage_class,
1985                                                      NULL, NULL));
1986          }
1987 
1988          val->type->type = nir_address_format_to_glsl_type(
1989             vtn_mode_to_address_format(b, mode));
1990       } else {
1991          vtn_fail_if(val->type->storage_class != storage_class,
1992                      "The storage classes of an OpTypePointer and any "
1993                      "OpTypeForwardPointers that provide forward "
1994                      "declarations of it must match.");
1995          has_forward_pointer = true;
1996       }
1997 
1998       if (opcode == SpvOpTypePointer) {
1999          vtn_fail_if(val->type->pointed != NULL,
2000                      "While OpTypeForwardPointer can be used to provide a "
2001                      "forward declaration of a pointer, OpTypePointer can "
2002                      "only be used once for a given id.");
2003 
2004          vtn_fail_if(has_forward_pointer &&
2005                      pointed_type->base_type != vtn_base_type_struct,
2006                      "An OpTypePointer instruction must declare "
2007                      "Pointer Type to be a pointer to an OpTypeStruct.");
2008 
2009          val->type->pointed = pointed_type;
2010 
2011          /* Only certain storage classes use ArrayStride. */
2012          switch (storage_class) {
2013          case SpvStorageClassWorkgroup:
2014             if (!b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR)
2015                break;
2016             FALLTHROUGH;
2017 
2018          case SpvStorageClassUniform:
2019          case SpvStorageClassPushConstant:
2020          case SpvStorageClassStorageBuffer:
2021          case SpvStorageClassPhysicalStorageBuffer:
2022             vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
2023             break;
2024 
2025          default:
2026             /* Nothing to do. */
2027             break;
2028          }
2029       }
2030       break;
2031    }
2032 
2033    case SpvOpTypeImage: {
2034       val->type->base_type = vtn_base_type_image;
2035 
2036       /* Images are represented in NIR as a scalar SSA value that is the
2037        * result of a deref instruction.  An OpLoad on an OpTypeImage pointer
2038        * from UniformConstant memory just takes the NIR deref from the pointer
2039        * and turns it into an SSA value.
2040        */
2041       val->type->type = nir_address_format_to_glsl_type(
2042          vtn_mode_to_address_format(b, vtn_variable_mode_function));
2043 
2044       const struct vtn_type *sampled_type = vtn_get_type(b, w[2]);
2045       if (b->shader->info.stage == MESA_SHADER_KERNEL) {
2046          vtn_fail_if(sampled_type->base_type != vtn_base_type_void,
2047                      "Sampled type of OpTypeImage must be void for kernels");
2048       } else {
2049          vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar,
2050                      "Sampled type of OpTypeImage must be a scalar");
2051          if (b->supported_capabilities.Int64ImageEXT) {
2052             vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32 &&
2053                         glsl_get_bit_size(sampled_type->type) != 64,
2054                         "Sampled type of OpTypeImage must be a 32 or 64-bit "
2055                         "scalar");
2056          } else {
2057             vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32,
2058                         "Sampled type of OpTypeImage must be a 32-bit scalar");
2059          }
2060       }
2061 
2062       enum glsl_sampler_dim dim;
2063       switch ((SpvDim)w[3]) {
2064       case SpvDim1D:       dim = GLSL_SAMPLER_DIM_1D;    break;
2065       case SpvDim2D:       dim = GLSL_SAMPLER_DIM_2D;    break;
2066       case SpvDim3D:       dim = GLSL_SAMPLER_DIM_3D;    break;
2067       case SpvDimCube:     dim = GLSL_SAMPLER_DIM_CUBE;  break;
2068       case SpvDimRect:     dim = GLSL_SAMPLER_DIM_RECT;  break;
2069       case SpvDimBuffer:   dim = GLSL_SAMPLER_DIM_BUF;   break;
2070       case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break;
2071       default:
2072          vtn_fail("Invalid SPIR-V image dimensionality: %s (%u)",
2073                   spirv_dim_to_string((SpvDim)w[3]), w[3]);
2074       }
2075 
2076       /* w[4]: as per Vulkan spec "Validation Rules within a Module",
2077        *       The “Depth” operand of OpTypeImage is ignored.
2078        */
2079       bool is_array = w[5];
2080       bool multisampled = w[6];
2081       unsigned sampled = w[7];
2082       SpvImageFormat format = w[8];
2083 
2084       if (count > 9)
2085          val->type->access_qualifier = w[9];
2086       else if (b->shader->info.stage == MESA_SHADER_KERNEL)
2087          /* Per the CL C spec: If no qualifier is provided, read_only is assumed. */
2088          val->type->access_qualifier = SpvAccessQualifierReadOnly;
2089       else
2090          val->type->access_qualifier = SpvAccessQualifierReadWrite;
2091 
2092       if (multisampled) {
2093          if (dim == GLSL_SAMPLER_DIM_2D)
2094             dim = GLSL_SAMPLER_DIM_MS;
2095          else if (dim == GLSL_SAMPLER_DIM_SUBPASS)
2096             dim = GLSL_SAMPLER_DIM_SUBPASS_MS;
2097          else
2098             vtn_fail("Unsupported multisampled image type");
2099       }
2100 
2101       val->type->image_format = translate_image_format(b, format);
2102 
2103       enum glsl_base_type sampled_base_type =
2104          glsl_get_base_type(sampled_type->type);
2105       if (sampled == 1) {
2106          val->type->glsl_image = glsl_texture_type(dim, is_array,
2107                                                    sampled_base_type);
2108       } else if (sampled == 2) {
2109          val->type->glsl_image = glsl_image_type(dim, is_array,
2110                                                  sampled_base_type);
2111       } else if (b->shader->info.stage == MESA_SHADER_KERNEL) {
2112          val->type->glsl_image = glsl_image_type(dim, is_array,
2113                                                  GLSL_TYPE_VOID);
2114       } else {
2115          vtn_fail("We need to know if the image will be sampled");
2116       }
2117       break;
2118    }
2119 
2120    case SpvOpTypeSampledImage: {
2121       val->type->base_type = vtn_base_type_sampled_image;
2122       val->type->image = vtn_get_type(b, w[2]);
2123 
2124       validate_image_type_for_sampled_image(
2125          b, val->type->image->glsl_image,
2126          "Image Type operand of OpTypeSampledImage");
2127 
2128       /* Sampled images are represented NIR as a vec2 SSA value where each
2129        * component is the result of a deref instruction.  The first component
2130        * is the image and the second is the sampler.  An OpLoad on an
2131        * OpTypeSampledImage pointer from UniformConstant memory just takes
2132        * the NIR deref from the pointer and duplicates it to both vector
2133        * components.
2134        */
2135       nir_address_format addr_format =
2136          vtn_mode_to_address_format(b, vtn_variable_mode_function);
2137       assert(nir_address_format_num_components(addr_format) == 1);
2138       unsigned bit_size = nir_address_format_bit_size(addr_format);
2139       assert(bit_size == 32 || bit_size == 64);
2140 
2141       enum glsl_base_type base_type =
2142          bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64;
2143       val->type->type = glsl_vector_type(base_type, 2);
2144       break;
2145    }
2146 
2147    case SpvOpTypeSampler:
2148       val->type->base_type = vtn_base_type_sampler;
2149 
2150       /* Samplers are represented in NIR as a scalar SSA value that is the
2151        * result of a deref instruction.  An OpLoad on an OpTypeSampler pointer
2152        * from UniformConstant memory just takes the NIR deref from the pointer
2153        * and turns it into an SSA value.
2154        */
2155       val->type->type = nir_address_format_to_glsl_type(
2156          vtn_mode_to_address_format(b, vtn_variable_mode_function));
2157       break;
2158 
2159    case SpvOpTypeAccelerationStructureKHR:
2160       val->type->base_type = vtn_base_type_accel_struct;
2161       val->type->type = glsl_uint64_t_type();
2162       break;
2163 
2164 
2165    case SpvOpTypeOpaque: {
2166       val->type->base_type = vtn_base_type_struct;
2167       const char *name = vtn_string_literal(b, &w[2], count - 2, NULL);
2168       val->type->type = glsl_struct_type(NULL, 0, name, false);
2169       break;
2170    }
2171 
2172    case SpvOpTypeRayQueryKHR: {
2173       val->type->base_type = vtn_base_type_ray_query;
2174       val->type->type = glsl_uint64_t_type();
2175       /* We may need to run queries on helper invocations. Here the parser
2176        * doesn't go through a deeper analysis on whether the result of a query
2177        * will be used in derivative instructions.
2178        *
2179        * An implementation willing to optimize this would look through the IR
2180        * and check if any derivative instruction uses the result of a query
2181        * and drop this flag if not.
2182        */
2183       if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
2184          val->type->access = ACCESS_INCLUDE_HELPERS;
2185       break;
2186    }
2187 
2188    case SpvOpTypeCooperativeMatrixKHR:
2189       vtn_handle_cooperative_type(b, val, opcode, w, count);
2190       break;
2191 
2192    case SpvOpTypeEvent:
2193       val->type->base_type = vtn_base_type_event;
2194       /*
2195        * this makes the event type compatible with pointer size due to LLVM 16.
2196        * llvm 17 fixes this properly, but with 16 and opaque ptrs it's still wrong.
2197        */
2198       val->type->type = b->shader->info.cs.ptr_size == 64 ? glsl_int64_t_type() : glsl_int_type();
2199       break;
2200 
2201    case SpvOpTypeDeviceEvent:
2202    case SpvOpTypeReserveId:
2203    case SpvOpTypeQueue:
2204    case SpvOpTypePipe:
2205    default:
2206       vtn_fail_with_opcode("Unhandled opcode", opcode);
2207    }
2208 
2209    vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
2210 
2211    if (val->type->base_type == vtn_base_type_struct &&
2212        (val->type->block || val->type->buffer_block)) {
2213       for (unsigned i = 0; i < val->type->length; i++) {
2214          vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]),
2215                      "Block and BufferBlock decorations cannot decorate a "
2216                      "structure type that is nested at any level inside "
2217                      "another structure type decorated with Block or "
2218                      "BufferBlock.");
2219       }
2220    }
2221 }
2222 
2223 static nir_constant *
vtn_null_constant(struct vtn_builder * b,struct vtn_type * type)2224 vtn_null_constant(struct vtn_builder *b, struct vtn_type *type)
2225 {
2226    nir_constant *c = rzalloc(b, nir_constant);
2227 
2228    switch (type->base_type) {
2229    case vtn_base_type_scalar:
2230    case vtn_base_type_vector:
2231       c->is_null_constant = true;
2232       /* Nothing to do here.  It's already initialized to zero */
2233       break;
2234 
2235    case vtn_base_type_pointer: {
2236       enum vtn_variable_mode mode = vtn_storage_class_to_mode(
2237          b, type->storage_class, type->pointed, NULL);
2238       nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
2239 
2240       const nir_const_value *null_value = nir_address_format_null_value(addr_format);
2241       memcpy(c->values, null_value,
2242              sizeof(nir_const_value) * nir_address_format_num_components(addr_format));
2243       break;
2244    }
2245 
2246    case vtn_base_type_void:
2247    case vtn_base_type_image:
2248    case vtn_base_type_sampler:
2249    case vtn_base_type_sampled_image:
2250    case vtn_base_type_function:
2251    case vtn_base_type_event:
2252       /* For those we have to return something but it doesn't matter what. */
2253       break;
2254 
2255    case vtn_base_type_matrix:
2256    case vtn_base_type_array:
2257       vtn_assert(type->length > 0);
2258       c->is_null_constant = true;
2259       c->num_elements = type->length;
2260       c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2261 
2262       c->elements[0] = vtn_null_constant(b, type->array_element);
2263       for (unsigned i = 1; i < c->num_elements; i++)
2264          c->elements[i] = c->elements[0];
2265       break;
2266 
2267    case vtn_base_type_struct:
2268       c->is_null_constant = true;
2269       c->num_elements = type->length;
2270       c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2271       for (unsigned i = 0; i < c->num_elements; i++)
2272          c->elements[i] = vtn_null_constant(b, type->members[i]);
2273       break;
2274 
2275    default:
2276       vtn_fail("Invalid type for null constant");
2277    }
2278 
2279    return c;
2280 }
2281 
2282 static void
spec_constant_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,void * data)2283 spec_constant_decoration_cb(struct vtn_builder *b, UNUSED struct vtn_value *val,
2284                             ASSERTED int member,
2285                             const struct vtn_decoration *dec, void *data)
2286 {
2287    vtn_assert(member == -1);
2288    if (dec->decoration != SpvDecorationSpecId)
2289       return;
2290 
2291    nir_const_value *value = data;
2292    for (unsigned i = 0; i < b->num_specializations; i++) {
2293       if (b->specializations[i].id == dec->operands[0]) {
2294          *value = b->specializations[i].value;
2295          return;
2296       }
2297    }
2298 }
2299 
2300 static void
handle_workgroup_size_decoration_cb(struct vtn_builder * b,struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,UNUSED void * data)2301 handle_workgroup_size_decoration_cb(struct vtn_builder *b,
2302                                     struct vtn_value *val,
2303                                     ASSERTED int member,
2304                                     const struct vtn_decoration *dec,
2305                                     UNUSED void *data)
2306 {
2307    vtn_assert(member == -1);
2308    if (dec->decoration != SpvDecorationBuiltIn ||
2309        dec->operands[0] != SpvBuiltInWorkgroupSize)
2310       return;
2311 
2312    vtn_assert(val->type->type == glsl_vector_type(GLSL_TYPE_UINT, 3));
2313    b->workgroup_size_builtin = val;
2314 }
2315 
2316 static void
vtn_handle_constant(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)2317 vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
2318                     const uint32_t *w, unsigned count)
2319 {
2320    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
2321    val->constant = rzalloc(b, nir_constant);
2322    switch (opcode) {
2323    case SpvOpConstantTrue:
2324    case SpvOpConstantFalse:
2325    case SpvOpSpecConstantTrue:
2326    case SpvOpSpecConstantFalse: {
2327       vtn_fail_if(val->type->type != glsl_bool_type(),
2328                   "Result type of %s must be OpTypeBool",
2329                   spirv_op_to_string(opcode));
2330 
2331       bool bval = (opcode == SpvOpConstantTrue ||
2332                    opcode == SpvOpSpecConstantTrue);
2333 
2334       nir_const_value u32val = nir_const_value_for_uint(bval, 32);
2335 
2336       if (opcode == SpvOpSpecConstantTrue ||
2337           opcode == SpvOpSpecConstantFalse)
2338          vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32val);
2339 
2340       val->constant->values[0].b = u32val.u32 != 0;
2341       break;
2342    }
2343 
2344    case SpvOpConstant:
2345    case SpvOpSpecConstant: {
2346       vtn_fail_if(val->type->base_type != vtn_base_type_scalar,
2347                   "Result type of %s must be a scalar",
2348                   spirv_op_to_string(opcode));
2349       int bit_size = glsl_get_bit_size(val->type->type);
2350       switch (bit_size) {
2351       case 64:
2352          val->constant->values[0].u64 = vtn_u64_literal(&w[3]);
2353          break;
2354       case 32:
2355          val->constant->values[0].u32 = w[3];
2356          break;
2357       case 16:
2358          val->constant->values[0].u16 = w[3];
2359          break;
2360       case 8:
2361          val->constant->values[0].u8 = w[3];
2362          break;
2363       default:
2364          vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size);
2365       }
2366 
2367       if (opcode == SpvOpSpecConstant)
2368          vtn_foreach_decoration(b, val, spec_constant_decoration_cb,
2369                                 &val->constant->values[0]);
2370       break;
2371    }
2372 
2373    case SpvOpSpecConstantComposite:
2374    case SpvOpConstantComposite:
2375    case SpvOpConstantCompositeReplicateEXT:
2376    case SpvOpSpecConstantCompositeReplicateEXT: {
2377       const unsigned elem_count =
2378          val->type->base_type == vtn_base_type_cooperative_matrix ?
2379          1 : val->type->length;
2380 
2381       nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
2382       if (opcode == SpvOpConstantCompositeReplicateEXT ||
2383           opcode == SpvOpSpecConstantCompositeReplicateEXT) {
2384          struct vtn_value *elem_val = vtn_untyped_value(b, w[3]);
2385 
2386          if (elem_val->value_type == vtn_value_type_constant) {
2387             elems[0] = elem_val->constant;
2388             val->is_undef_constant = false;
2389          } else {
2390             vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
2391                         "only constants or undefs allowed for %s",
2392                         spirv_op_to_string(opcode));
2393             /* to make it easier, just insert a NULL constant for now */
2394             elems[0] = vtn_null_constant(b, elem_val->type);
2395             val->is_undef_constant = true;
2396          }
2397 
2398          for (unsigned i = 1; i < elem_count; i++)
2399             elems[i] = elems[0];
2400       } else {
2401          vtn_fail_if(elem_count != count - 3,
2402                      "%s has %u constituents, expected %u",
2403                      spirv_op_to_string(opcode), count - 3, elem_count);
2404 
2405          val->is_undef_constant = true;
2406          for (unsigned i = 0; i < elem_count; i++) {
2407             struct vtn_value *elem_val = vtn_untyped_value(b, w[i + 3]);
2408 
2409             if (elem_val->value_type == vtn_value_type_constant) {
2410                elems[i] = elem_val->constant;
2411                val->is_undef_constant = val->is_undef_constant &&
2412                                         elem_val->is_undef_constant;
2413             } else {
2414                vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
2415                            "only constants or undefs allowed for %s",
2416                            spirv_op_to_string(opcode));
2417                /* to make it easier, just insert a NULL constant for now */
2418                elems[i] = vtn_null_constant(b, elem_val->type);
2419             }
2420          }
2421       }
2422 
2423       switch (val->type->base_type) {
2424       case vtn_base_type_vector: {
2425          assert(glsl_type_is_vector(val->type->type));
2426          for (unsigned i = 0; i < elem_count; i++)
2427             val->constant->values[i] = elems[i]->values[0];
2428          break;
2429       }
2430 
2431       case vtn_base_type_matrix:
2432       case vtn_base_type_struct:
2433       case vtn_base_type_array:
2434          ralloc_steal(val->constant, elems);
2435          val->constant->num_elements = elem_count;
2436          val->constant->elements = elems;
2437          break;
2438 
2439       case vtn_base_type_cooperative_matrix:
2440          val->constant->values[0] = elems[0]->values[0];
2441          break;
2442 
2443       default:
2444          vtn_fail("Result type of %s must be a composite type",
2445                   spirv_op_to_string(opcode));
2446       }
2447       break;
2448    }
2449 
2450    case SpvOpSpecConstantOp: {
2451       nir_const_value u32op = nir_const_value_for_uint(w[3], 32);
2452       vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32op);
2453       SpvOp opcode = u32op.u32;
2454       switch (opcode) {
2455       case SpvOpVectorShuffle: {
2456          struct vtn_value *v0 = &b->values[w[4]];
2457          struct vtn_value *v1 = &b->values[w[5]];
2458 
2459          vtn_assert(v0->value_type == vtn_value_type_constant ||
2460                     v0->value_type == vtn_value_type_undef);
2461          vtn_assert(v1->value_type == vtn_value_type_constant ||
2462                     v1->value_type == vtn_value_type_undef);
2463 
2464          unsigned len0 = glsl_get_vector_elements(v0->type->type);
2465          unsigned len1 = glsl_get_vector_elements(v1->type->type);
2466 
2467          vtn_assert(len0 + len1 < 16);
2468 
2469          unsigned bit_size = glsl_get_bit_size(val->type->type);
2470          unsigned bit_size0 = glsl_get_bit_size(v0->type->type);
2471          unsigned bit_size1 = glsl_get_bit_size(v1->type->type);
2472 
2473          vtn_assert(bit_size == bit_size0 && bit_size == bit_size1);
2474          (void)bit_size0; (void)bit_size1;
2475 
2476          nir_const_value undef = { .u64 = 0xdeadbeefdeadbeef };
2477          nir_const_value combined[NIR_MAX_VEC_COMPONENTS * 2];
2478 
2479          if (v0->value_type == vtn_value_type_constant) {
2480             for (unsigned i = 0; i < len0; i++)
2481                combined[i] = v0->constant->values[i];
2482          }
2483          if (v1->value_type == vtn_value_type_constant) {
2484             for (unsigned i = 0; i < len1; i++)
2485                combined[len0 + i] = v1->constant->values[i];
2486          }
2487 
2488          for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
2489             uint32_t comp = w[i + 6];
2490             if (comp == (uint32_t)-1) {
2491                /* If component is not used, set the value to a known constant
2492                 * to detect if it is wrongly used.
2493                 */
2494                val->constant->values[j] = undef;
2495             } else {
2496                vtn_fail_if(comp >= len0 + len1,
2497                            "All Component literals must either be FFFFFFFF "
2498                            "or in [0, N - 1] (inclusive).");
2499                val->constant->values[j] = combined[comp];
2500             }
2501          }
2502          break;
2503       }
2504 
2505       case SpvOpCompositeExtract:
2506       case SpvOpCompositeInsert: {
2507          struct vtn_value *comp;
2508          unsigned deref_start;
2509          struct nir_constant **c;
2510          if (opcode == SpvOpCompositeExtract) {
2511             comp = vtn_value(b, w[4], vtn_value_type_constant);
2512             deref_start = 5;
2513             c = &comp->constant;
2514          } else {
2515             comp = vtn_value(b, w[5], vtn_value_type_constant);
2516             deref_start = 6;
2517             val->constant = nir_constant_clone(comp->constant,
2518                                                (nir_variable *)b);
2519             c = &val->constant;
2520          }
2521 
2522          int elem = -1;
2523          const struct vtn_type *type = comp->type;
2524          for (unsigned i = deref_start; i < count; i++) {
2525             if (type->base_type == vtn_base_type_cooperative_matrix) {
2526                /* Cooperative matrices are always scalar constants.  We don't
2527                 * care about the index w[i] because it's always replicated.
2528                 */
2529                type = type->component_type;
2530             } else {
2531                vtn_fail_if(w[i] > type->length,
2532                            "%uth index of %s is %u but the type has only "
2533                            "%u elements", i - deref_start,
2534                            spirv_op_to_string(opcode), w[i], type->length);
2535 
2536                switch (type->base_type) {
2537                case vtn_base_type_vector:
2538                   elem = w[i];
2539                   type = type->array_element;
2540                   break;
2541 
2542                case vtn_base_type_matrix:
2543                case vtn_base_type_array:
2544                   c = &(*c)->elements[w[i]];
2545                   type = type->array_element;
2546                   break;
2547 
2548                case vtn_base_type_struct:
2549                   c = &(*c)->elements[w[i]];
2550                   type = type->members[w[i]];
2551                   break;
2552 
2553                default:
2554                   vtn_fail("%s must only index into composite types",
2555                            spirv_op_to_string(opcode));
2556                }
2557             }
2558          }
2559 
2560          if (opcode == SpvOpCompositeExtract) {
2561             if (elem == -1) {
2562                val->constant = *c;
2563             } else {
2564                unsigned num_components = type->length;
2565                for (unsigned i = 0; i < num_components; i++)
2566                   val->constant->values[i] = (*c)->values[elem + i];
2567             }
2568          } else {
2569             struct vtn_value *insert =
2570                vtn_value(b, w[4], vtn_value_type_constant);
2571             vtn_assert(insert->type == type);
2572             if (elem == -1) {
2573                *c = insert->constant;
2574             } else {
2575                unsigned num_components = type->length;
2576                for (unsigned i = 0; i < num_components; i++)
2577                   (*c)->values[elem + i] = insert->constant->values[i];
2578             }
2579          }
2580          break;
2581       }
2582 
2583       default: {
2584          bool swap;
2585          nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(val->type->type);
2586          nir_alu_type src_alu_type = dst_alu_type;
2587          unsigned num_components = glsl_get_vector_elements(val->type->type);
2588          unsigned bit_size;
2589 
2590          vtn_assert(count <= 7);
2591 
2592          switch (opcode) {
2593          case SpvOpSConvert:
2594          case SpvOpFConvert:
2595          case SpvOpUConvert:
2596             /* We have a source in a conversion */
2597             src_alu_type =
2598                nir_get_nir_type_for_glsl_type(vtn_get_value_type(b, w[4])->type);
2599             /* We use the bitsize of the conversion source to evaluate the opcode later */
2600             bit_size = glsl_get_bit_size(vtn_get_value_type(b, w[4])->type);
2601             break;
2602          default:
2603             bit_size = glsl_get_bit_size(val->type->type);
2604          };
2605 
2606          bool exact;
2607          nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
2608                                                      nir_alu_type_get_type_size(src_alu_type),
2609                                                      nir_alu_type_get_type_size(dst_alu_type));
2610 
2611          /* No SPIR-V opcodes handled through this path should set exact.
2612           * Since it is ignored, assert on it.
2613           */
2614          assert(!exact);
2615 
2616          nir_const_value src[3][NIR_MAX_VEC_COMPONENTS];
2617 
2618          for (unsigned i = 0; i < count - 4; i++) {
2619             struct vtn_value *src_val =
2620                vtn_value(b, w[4 + i], vtn_value_type_constant);
2621 
2622             /* If this is an unsized source, pull the bit size from the
2623              * source; otherwise, we'll use the bit size from the destination.
2624              */
2625             if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]))
2626                bit_size = glsl_get_bit_size(src_val->type->type);
2627 
2628             unsigned src_comps = nir_op_infos[op].input_sizes[i] ?
2629                                  nir_op_infos[op].input_sizes[i] :
2630                                  num_components;
2631 
2632             unsigned j = swap ? 1 - i : i;
2633             for (unsigned c = 0; c < src_comps; c++)
2634                src[j][c] = src_val->constant->values[c];
2635          }
2636 
2637          /* fix up fixed size sources */
2638          switch (op) {
2639          case nir_op_ishl:
2640          case nir_op_ishr:
2641          case nir_op_ushr: {
2642             if (bit_size == 32)
2643                break;
2644             for (unsigned i = 0; i < num_components; ++i) {
2645                switch (bit_size) {
2646                case 64: src[1][i].u32 = src[1][i].u64; break;
2647                case 16: src[1][i].u32 = src[1][i].u16; break;
2648                case  8: src[1][i].u32 = src[1][i].u8;  break;
2649                }
2650             }
2651             break;
2652          }
2653          default:
2654             break;
2655          }
2656 
2657          nir_const_value *srcs[3] = {
2658             src[0], src[1], src[2],
2659          };
2660          nir_eval_const_opcode(op, val->constant->values,
2661                                num_components, bit_size, srcs,
2662                                b->shader->info.float_controls_execution_mode);
2663          break;
2664       } /* default */
2665       }
2666       break;
2667    }
2668 
2669    case SpvOpConstantNull:
2670       val->constant = vtn_null_constant(b, val->type);
2671       val->is_null_constant = true;
2672       break;
2673 
2674    default:
2675       vtn_fail_with_opcode("Unhandled opcode", opcode);
2676    }
2677 
2678    /* Now that we have the value, update the workgroup size if needed */
2679    if (gl_shader_stage_uses_workgroup(b->entry_point_stage))
2680       vtn_foreach_decoration(b, val, handle_workgroup_size_decoration_cb,
2681                              NULL);
2682 }
2683 
2684 static void
vtn_split_barrier_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics,SpvMemorySemanticsMask * before,SpvMemorySemanticsMask * after)2685 vtn_split_barrier_semantics(struct vtn_builder *b,
2686                             SpvMemorySemanticsMask semantics,
2687                             SpvMemorySemanticsMask *before,
2688                             SpvMemorySemanticsMask *after)
2689 {
2690    /* For memory semantics embedded in operations, we split them into up to
2691     * two barriers, to be added before and after the operation.  This is less
2692     * strict than if we propagated until the final backend stage, but still
2693     * result in correct execution.
2694     *
2695     * A further improvement could be pipe this information (and use!) into the
2696     * next compiler layers, at the expense of making the handling of barriers
2697     * more complicated.
2698     */
2699 
2700    *before = SpvMemorySemanticsMaskNone;
2701    *after = SpvMemorySemanticsMaskNone;
2702 
2703    SpvMemorySemanticsMask order_semantics =
2704       semantics & (SpvMemorySemanticsAcquireMask |
2705                    SpvMemorySemanticsReleaseMask |
2706                    SpvMemorySemanticsAcquireReleaseMask |
2707                    SpvMemorySemanticsSequentiallyConsistentMask);
2708 
2709    if (util_bitcount(order_semantics) > 1) {
2710       /* Old GLSLang versions incorrectly set all the ordering bits.  This was
2711        * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2712        * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2713        */
2714       vtn_warn("Multiple memory ordering semantics specified, "
2715                "assuming AcquireRelease.");
2716       order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2717    }
2718 
2719    const SpvMemorySemanticsMask av_vis_semantics =
2720       semantics & (SpvMemorySemanticsMakeAvailableMask |
2721                    SpvMemorySemanticsMakeVisibleMask);
2722 
2723    const SpvMemorySemanticsMask storage_semantics =
2724       semantics & (SpvMemorySemanticsUniformMemoryMask |
2725                    SpvMemorySemanticsSubgroupMemoryMask |
2726                    SpvMemorySemanticsWorkgroupMemoryMask |
2727                    SpvMemorySemanticsCrossWorkgroupMemoryMask |
2728                    SpvMemorySemanticsAtomicCounterMemoryMask |
2729                    SpvMemorySemanticsImageMemoryMask |
2730                    SpvMemorySemanticsOutputMemoryMask);
2731 
2732    const SpvMemorySemanticsMask other_semantics =
2733       semantics & ~(order_semantics | av_vis_semantics | storage_semantics |
2734                     SpvMemorySemanticsVolatileMask);
2735 
2736    if (other_semantics)
2737       vtn_warn("Ignoring unhandled memory semantics: %u\n", other_semantics);
2738 
2739    /* SequentiallyConsistent is treated as AcquireRelease. */
2740 
2741    /* The RELEASE barrier happens BEFORE the operation, and it is usually
2742     * associated with a Store.  All the write operations with a matching
2743     * semantics will not be reordered after the Store.
2744     */
2745    if (order_semantics & (SpvMemorySemanticsReleaseMask |
2746                           SpvMemorySemanticsAcquireReleaseMask |
2747                           SpvMemorySemanticsSequentiallyConsistentMask)) {
2748       *before |= SpvMemorySemanticsReleaseMask | storage_semantics;
2749    }
2750 
2751    /* The ACQUIRE barrier happens AFTER the operation, and it is usually
2752     * associated with a Load.  All the operations with a matching semantics
2753     * will not be reordered before the Load.
2754     */
2755    if (order_semantics & (SpvMemorySemanticsAcquireMask |
2756                           SpvMemorySemanticsAcquireReleaseMask |
2757                           SpvMemorySemanticsSequentiallyConsistentMask)) {
2758       *after |= SpvMemorySemanticsAcquireMask | storage_semantics;
2759    }
2760 
2761    if (av_vis_semantics & SpvMemorySemanticsMakeVisibleMask)
2762       *before |= SpvMemorySemanticsMakeVisibleMask | storage_semantics;
2763 
2764    if (av_vis_semantics & SpvMemorySemanticsMakeAvailableMask)
2765       *after |= SpvMemorySemanticsMakeAvailableMask | storage_semantics;
2766 }
2767 
2768 static nir_memory_semantics
vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2769 vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder *b,
2770                                        SpvMemorySemanticsMask semantics)
2771 {
2772    nir_memory_semantics nir_semantics = 0;
2773 
2774    SpvMemorySemanticsMask order_semantics =
2775       semantics & (SpvMemorySemanticsAcquireMask |
2776                    SpvMemorySemanticsReleaseMask |
2777                    SpvMemorySemanticsAcquireReleaseMask |
2778                    SpvMemorySemanticsSequentiallyConsistentMask);
2779 
2780    if (util_bitcount(order_semantics) > 1) {
2781       /* Old GLSLang versions incorrectly set all the ordering bits.  This was
2782        * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2783        * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2784        */
2785       vtn_warn("Multiple memory ordering semantics bits specified, "
2786                "assuming AcquireRelease.");
2787       order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2788    }
2789 
2790    switch (order_semantics) {
2791    case 0:
2792       /* Not an ordering barrier. */
2793       break;
2794 
2795    case SpvMemorySemanticsAcquireMask:
2796       nir_semantics = NIR_MEMORY_ACQUIRE;
2797       break;
2798 
2799    case SpvMemorySemanticsReleaseMask:
2800       nir_semantics = NIR_MEMORY_RELEASE;
2801       break;
2802 
2803    case SpvMemorySemanticsSequentiallyConsistentMask:
2804       FALLTHROUGH; /* Treated as AcquireRelease in Vulkan. */
2805    case SpvMemorySemanticsAcquireReleaseMask:
2806       nir_semantics = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE;
2807       break;
2808 
2809    default:
2810       unreachable("Invalid memory order semantics");
2811    }
2812 
2813    if (semantics & SpvMemorySemanticsMakeAvailableMask) {
2814       vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
2815                   "To use MakeAvailable memory semantics the VulkanMemoryModel "
2816                   "capability must be declared.");
2817       nir_semantics |= NIR_MEMORY_MAKE_AVAILABLE;
2818    }
2819 
2820    if (semantics & SpvMemorySemanticsMakeVisibleMask) {
2821       vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
2822                   "To use MakeVisible memory semantics the VulkanMemoryModel "
2823                   "capability must be declared.");
2824       nir_semantics |= NIR_MEMORY_MAKE_VISIBLE;
2825    }
2826 
2827    return nir_semantics;
2828 }
2829 
2830 static nir_variable_mode
vtn_mem_semantics_to_nir_var_modes(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2831 vtn_mem_semantics_to_nir_var_modes(struct vtn_builder *b,
2832                                    SpvMemorySemanticsMask semantics)
2833 {
2834    /* Vulkan Environment for SPIR-V says "SubgroupMemory, CrossWorkgroupMemory,
2835     * and AtomicCounterMemory are ignored".
2836     */
2837    if (b->options->environment == NIR_SPIRV_VULKAN) {
2838       semantics &= ~(SpvMemorySemanticsSubgroupMemoryMask |
2839                      SpvMemorySemanticsCrossWorkgroupMemoryMask |
2840                      SpvMemorySemanticsAtomicCounterMemoryMask);
2841    }
2842 
2843    nir_variable_mode modes = 0;
2844    if (semantics & SpvMemorySemanticsUniformMemoryMask)
2845       modes |= nir_var_mem_ssbo | nir_var_mem_global;
2846    if (semantics & SpvMemorySemanticsImageMemoryMask)
2847       modes |= nir_var_image;
2848    if (semantics & SpvMemorySemanticsWorkgroupMemoryMask)
2849       modes |= nir_var_mem_shared;
2850    if (semantics & SpvMemorySemanticsCrossWorkgroupMemoryMask)
2851       modes |= nir_var_mem_global;
2852    if (semantics & SpvMemorySemanticsOutputMemoryMask) {
2853       modes |= nir_var_shader_out;
2854 
2855       if (b->shader->info.stage == MESA_SHADER_TASK)
2856          modes |= nir_var_mem_task_payload;
2857    }
2858 
2859    if (semantics & SpvMemorySemanticsAtomicCounterMemoryMask) {
2860       /* There's no nir_var_atomic_counter, but since atomic counters are
2861        * lowered to SSBOs, we use nir_var_mem_ssbo instead.
2862        */
2863       modes |= nir_var_mem_ssbo;
2864    }
2865 
2866    return modes;
2867 }
2868 
2869 mesa_scope
vtn_translate_scope(struct vtn_builder * b,SpvScope scope)2870 vtn_translate_scope(struct vtn_builder *b, SpvScope scope)
2871 {
2872    switch (scope) {
2873    case SpvScopeDevice:
2874       vtn_fail_if(b->supported_capabilities.VulkanMemoryModel &&
2875                   !b->supported_capabilities.VulkanMemoryModelDeviceScope,
2876                   "If the Vulkan memory model is declared and any instruction "
2877                   "uses Device scope, the VulkanMemoryModelDeviceScope "
2878                   "capability must be declared.");
2879       return SCOPE_DEVICE;
2880 
2881    case SpvScopeQueueFamily:
2882       vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
2883                   "To use Queue Family scope, the VulkanMemoryModel capability "
2884                   "must be declared.");
2885       return SCOPE_QUEUE_FAMILY;
2886 
2887    case SpvScopeWorkgroup:
2888       return SCOPE_WORKGROUP;
2889 
2890    case SpvScopeSubgroup:
2891       return SCOPE_SUBGROUP;
2892 
2893    case SpvScopeInvocation:
2894       return SCOPE_INVOCATION;
2895 
2896    case SpvScopeShaderCallKHR:
2897       return SCOPE_SHADER_CALL;
2898 
2899    default:
2900       vtn_fail("Invalid memory scope");
2901    }
2902 }
2903 
2904 static void
vtn_emit_scoped_control_barrier(struct vtn_builder * b,SpvScope exec_scope,SpvScope mem_scope,SpvMemorySemanticsMask semantics)2905 vtn_emit_scoped_control_barrier(struct vtn_builder *b, SpvScope exec_scope,
2906                                 SpvScope mem_scope,
2907                                 SpvMemorySemanticsMask semantics)
2908 {
2909    nir_memory_semantics nir_semantics =
2910       vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2911    nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2912    mesa_scope nir_exec_scope = vtn_translate_scope(b, exec_scope);
2913 
2914    /* Memory semantics is optional for OpControlBarrier. */
2915    mesa_scope nir_mem_scope;
2916    if (nir_semantics == 0 || modes == 0)
2917       nir_mem_scope = SCOPE_NONE;
2918    else
2919       nir_mem_scope = vtn_translate_scope(b, mem_scope);
2920 
2921    nir_barrier(&b->nb, .execution_scope=nir_exec_scope, .memory_scope=nir_mem_scope,
2922                        .memory_semantics=nir_semantics, .memory_modes=modes);
2923 }
2924 
2925 void
vtn_emit_memory_barrier(struct vtn_builder * b,SpvScope scope,SpvMemorySemanticsMask semantics)2926 vtn_emit_memory_barrier(struct vtn_builder *b, SpvScope scope,
2927                         SpvMemorySemanticsMask semantics)
2928 {
2929    nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2930    nir_memory_semantics nir_semantics =
2931       vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2932 
2933    /* No barrier to add. */
2934    if (nir_semantics == 0 || modes == 0)
2935       return;
2936 
2937    nir_barrier(&b->nb, .memory_scope=vtn_translate_scope(b, scope),
2938                        .memory_semantics=nir_semantics,
2939                        .memory_modes=modes);
2940 }
2941 
2942 struct vtn_ssa_value *
vtn_create_ssa_value(struct vtn_builder * b,const struct glsl_type * type)2943 vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
2944 {
2945    /* Always use bare types for SSA values for a couple of reasons:
2946     *
2947     *  1. Code which emits deref chains should never listen to the explicit
2948     *     layout information on the SSA value if any exists.  If we've
2949     *     accidentally been relying on this, we want to find those bugs.
2950     *
2951     *  2. We want to be able to quickly check that an SSA value being assigned
2952     *     to a SPIR-V value has the right type.  Using bare types everywhere
2953     *     ensures that we can pointer-compare.
2954     */
2955    struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
2956    val->type = glsl_get_bare_type(type);
2957 
2958 
2959    if (!glsl_type_is_vector_or_scalar(type)) {
2960       unsigned elems = glsl_get_length(val->type);
2961       val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
2962       if (glsl_type_is_array_or_matrix(type) || glsl_type_is_cmat(type)) {
2963          const struct glsl_type *elem_type = glsl_get_array_element(type);
2964          for (unsigned i = 0; i < elems; i++)
2965             val->elems[i] = vtn_create_ssa_value(b, elem_type);
2966       } else {
2967          vtn_assert(glsl_type_is_struct_or_ifc(type));
2968          for (unsigned i = 0; i < elems; i++) {
2969             const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
2970             val->elems[i] = vtn_create_ssa_value(b, elem_type);
2971          }
2972       }
2973    }
2974 
2975    return val;
2976 }
2977 
2978 void
vtn_set_ssa_value_var(struct vtn_builder * b,struct vtn_ssa_value * ssa,nir_variable * var)2979 vtn_set_ssa_value_var(struct vtn_builder *b, struct vtn_ssa_value *ssa, nir_variable *var)
2980 {
2981    vtn_assert(glsl_type_is_cmat(var->type));
2982    vtn_assert(var->type == ssa->type);
2983    ssa->is_variable = true;
2984    ssa->var = var;
2985 }
2986 
2987 static nir_tex_src
vtn_tex_src(struct vtn_builder * b,unsigned index,nir_tex_src_type type)2988 vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
2989 {
2990    return nir_tex_src_for_ssa(type, vtn_get_nir_ssa(b, index));
2991 }
2992 
2993 static uint32_t
image_operand_arg(struct vtn_builder * b,const uint32_t * w,uint32_t count,uint32_t mask_idx,SpvImageOperandsMask op)2994 image_operand_arg(struct vtn_builder *b, const uint32_t *w, uint32_t count,
2995                   uint32_t mask_idx, SpvImageOperandsMask op)
2996 {
2997    static const SpvImageOperandsMask ops_with_arg =
2998       SpvImageOperandsBiasMask |
2999       SpvImageOperandsLodMask |
3000       SpvImageOperandsGradMask |
3001       SpvImageOperandsConstOffsetMask |
3002       SpvImageOperandsOffsetMask |
3003       SpvImageOperandsConstOffsetsMask |
3004       SpvImageOperandsSampleMask |
3005       SpvImageOperandsMinLodMask |
3006       SpvImageOperandsMakeTexelAvailableMask |
3007       SpvImageOperandsMakeTexelVisibleMask;
3008 
3009    assert(util_bitcount(op) == 1);
3010    assert(w[mask_idx] & op);
3011    assert(op & ops_with_arg);
3012 
3013    uint32_t idx = util_bitcount(w[mask_idx] & (op - 1) & ops_with_arg) + 1;
3014 
3015    /* Adjust indices for operands with two arguments. */
3016    static const SpvImageOperandsMask ops_with_two_args =
3017       SpvImageOperandsGradMask;
3018    idx += util_bitcount(w[mask_idx] & (op - 1) & ops_with_two_args);
3019 
3020    idx += mask_idx;
3021 
3022    vtn_fail_if(idx + (op & ops_with_two_args ? 1 : 0) >= count,
3023                "Image op claims to have %s but does not enough "
3024                "following operands", spirv_imageoperands_to_string(op));
3025 
3026    return idx;
3027 }
3028 
3029 static void
non_uniform_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)3030 non_uniform_decoration_cb(struct vtn_builder *b,
3031                           struct vtn_value *val, int member,
3032                           const struct vtn_decoration *dec, void *void_ctx)
3033 {
3034    enum gl_access_qualifier *access = void_ctx;
3035    switch (dec->decoration) {
3036    case SpvDecorationNonUniformEXT:
3037       *access |= ACCESS_NON_UNIFORM;
3038       break;
3039 
3040    default:
3041       break;
3042    }
3043 }
3044 
3045 /* Apply SignExtend/ZeroExtend operands to get the actual result type for
3046  * image read/sample operations and source type for write operations.
3047  */
3048 static nir_alu_type
get_image_type(struct vtn_builder * b,nir_alu_type type,unsigned operands)3049 get_image_type(struct vtn_builder *b, nir_alu_type type, unsigned operands)
3050 {
3051    unsigned extend_operands =
3052       operands & (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask);
3053    vtn_fail_if(nir_alu_type_get_base_type(type) == nir_type_float && extend_operands,
3054                "SignExtend/ZeroExtend used on floating-point texel type");
3055    vtn_fail_if(extend_operands ==
3056                (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask),
3057                "SignExtend and ZeroExtend both specified");
3058 
3059    if (operands & SpvImageOperandsSignExtendMask)
3060       return nir_type_int | nir_alu_type_get_type_size(type);
3061    if (operands & SpvImageOperandsZeroExtendMask)
3062       return nir_type_uint | nir_alu_type_get_type_size(type);
3063 
3064    return type;
3065 }
3066 
3067 static void
vtn_handle_texture(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3068 vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
3069                    const uint32_t *w, unsigned count)
3070 {
3071    if (opcode == SpvOpSampledImage) {
3072       struct vtn_sampled_image si = {
3073          .image = vtn_get_image(b, w[3], NULL),
3074          .sampler = vtn_get_sampler(b, w[4]),
3075       };
3076 
3077       validate_image_type_for_sampled_image(
3078          b, si.image->type,
3079          "Type of Image operand of OpSampledImage");
3080 
3081       enum gl_access_qualifier access = 0;
3082       vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
3083                              non_uniform_decoration_cb, &access);
3084       vtn_foreach_decoration(b, vtn_untyped_value(b, w[4]),
3085                              non_uniform_decoration_cb, &access);
3086 
3087       vtn_push_sampled_image(b, w[2], si, access & ACCESS_NON_UNIFORM);
3088       return;
3089    } else if (opcode == SpvOpImage) {
3090       struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
3091 
3092       enum gl_access_qualifier access = 0;
3093       vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
3094                              non_uniform_decoration_cb, &access);
3095 
3096       vtn_push_image(b, w[2], si.image, access & ACCESS_NON_UNIFORM);
3097       return;
3098    } else if (opcode == SpvOpImageSparseTexelsResident) {
3099       nir_def *code = vtn_get_nir_ssa(b, w[3]);
3100       vtn_push_nir_ssa(b, w[2], nir_is_sparse_texels_resident(&b->nb, 1, code));
3101       return;
3102    }
3103 
3104    nir_deref_instr *image = NULL, *sampler = NULL;
3105    struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
3106    if (sampled_val->type->base_type == vtn_base_type_sampled_image) {
3107       struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
3108       image = si.image;
3109       sampler = si.sampler;
3110    } else {
3111       image = vtn_get_image(b, w[3], NULL);
3112    }
3113 
3114    const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image->type);
3115    const bool is_array = glsl_sampler_type_is_array(image->type);
3116    nir_alu_type dest_type = nir_type_invalid;
3117 
3118    /* Figure out the base texture operation */
3119    nir_texop texop;
3120    switch (opcode) {
3121    case SpvOpImageSampleImplicitLod:
3122    case SpvOpImageSparseSampleImplicitLod:
3123    case SpvOpImageSampleDrefImplicitLod:
3124    case SpvOpImageSparseSampleDrefImplicitLod:
3125       vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
3126                  sampler_dim != GLSL_SAMPLER_DIM_MS &&
3127                  sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
3128       texop = nir_texop_tex;
3129       break;
3130 
3131    case SpvOpImageSampleProjImplicitLod:
3132    case SpvOpImageSampleProjDrefImplicitLod:
3133       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3134                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
3135                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
3136                  sampler_dim == GLSL_SAMPLER_DIM_RECT);
3137       vtn_assert(!is_array);
3138       texop = nir_texop_tex;
3139       break;
3140 
3141    case SpvOpImageSampleExplicitLod:
3142    case SpvOpImageSparseSampleExplicitLod:
3143    case SpvOpImageSampleDrefExplicitLod:
3144    case SpvOpImageSparseSampleDrefExplicitLod:
3145       vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
3146                  sampler_dim != GLSL_SAMPLER_DIM_MS &&
3147                  sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
3148       texop = nir_texop_txl;
3149       break;
3150 
3151    case SpvOpImageSampleProjExplicitLod:
3152    case SpvOpImageSampleProjDrefExplicitLod:
3153       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3154                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
3155                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
3156                  sampler_dim == GLSL_SAMPLER_DIM_RECT);
3157       vtn_assert(!is_array);
3158       texop = nir_texop_txl;
3159       break;
3160 
3161    case SpvOpImageFetch:
3162    case SpvOpImageSparseFetch:
3163       vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_CUBE);
3164       if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
3165          texop = nir_texop_txf_ms;
3166       } else {
3167          texop = nir_texop_txf;
3168       }
3169       break;
3170 
3171    case SpvOpImageGather:
3172    case SpvOpImageSparseGather:
3173    case SpvOpImageDrefGather:
3174    case SpvOpImageSparseDrefGather:
3175       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_2D ||
3176                  sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
3177                  sampler_dim == GLSL_SAMPLER_DIM_RECT);
3178       texop = nir_texop_tg4;
3179       break;
3180 
3181    case SpvOpImageQuerySizeLod:
3182       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3183                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
3184                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
3185                  sampler_dim == GLSL_SAMPLER_DIM_CUBE);
3186       texop = nir_texop_txs;
3187       dest_type = nir_type_int32;
3188       break;
3189 
3190    case SpvOpImageQuerySize:
3191       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3192                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
3193                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
3194                  sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
3195                  sampler_dim == GLSL_SAMPLER_DIM_RECT ||
3196                  sampler_dim == GLSL_SAMPLER_DIM_MS ||
3197                  sampler_dim == GLSL_SAMPLER_DIM_BUF);
3198       texop = nir_texop_txs;
3199       dest_type = nir_type_int32;
3200       break;
3201 
3202    case SpvOpImageQueryLod:
3203       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3204                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
3205                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
3206                  sampler_dim == GLSL_SAMPLER_DIM_CUBE);
3207       texop = nir_texop_lod;
3208       dest_type = nir_type_float32;
3209       break;
3210 
3211    case SpvOpImageQueryLevels:
3212       /* This operation is not valid for a MS image but present in some old
3213        * shaders.  Just return 1 in those cases.
3214        */
3215       if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
3216          vtn_warn("OpImageQueryLevels 'Sampled Image' should have an MS of 0, "
3217                   "but found MS of 1.  Replacing query with constant value 1.");
3218          vtn_push_nir_ssa(b, w[2], nir_imm_int(&b->nb, 1));
3219          return;
3220       }
3221       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3222                  sampler_dim == GLSL_SAMPLER_DIM_2D ||
3223                  sampler_dim == GLSL_SAMPLER_DIM_3D ||
3224                  sampler_dim == GLSL_SAMPLER_DIM_CUBE);
3225       texop = nir_texop_query_levels;
3226       dest_type = nir_type_int32;
3227       break;
3228 
3229    case SpvOpImageQuerySamples:
3230       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS);
3231       texop = nir_texop_texture_samples;
3232       dest_type = nir_type_int32;
3233       break;
3234 
3235    case SpvOpFragmentFetchAMD:
3236       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
3237                  sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3238       texop = nir_texop_fragment_fetch_amd;
3239       break;
3240 
3241    case SpvOpFragmentMaskFetchAMD:
3242       vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
3243                  sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3244       texop = nir_texop_fragment_mask_fetch_amd;
3245       dest_type = nir_type_uint32;
3246       break;
3247 
3248    default:
3249       vtn_fail_with_opcode("Unhandled opcode", opcode);
3250    }
3251 
3252    nir_tex_src srcs[10]; /* 10 should be enough */
3253    nir_tex_src *p = srcs;
3254 
3255    p->src = nir_src_for_ssa(&image->def);
3256    p->src_type = nir_tex_src_texture_deref;
3257    p++;
3258 
3259    switch (texop) {
3260    case nir_texop_tex:
3261    case nir_texop_txb:
3262    case nir_texop_txl:
3263    case nir_texop_txd:
3264    case nir_texop_tg4:
3265    case nir_texop_lod:
3266       vtn_fail_if(sampler == NULL,
3267                   "%s requires an image of type OpTypeSampledImage",
3268                   spirv_op_to_string(opcode));
3269       p->src = nir_src_for_ssa(&sampler->def);
3270       p->src_type = nir_tex_src_sampler_deref;
3271       p++;
3272       break;
3273    case nir_texop_txf:
3274    case nir_texop_txf_ms:
3275    case nir_texop_txs:
3276    case nir_texop_query_levels:
3277    case nir_texop_texture_samples:
3278    case nir_texop_samples_identical:
3279    case nir_texop_fragment_fetch_amd:
3280    case nir_texop_fragment_mask_fetch_amd:
3281       /* These don't */
3282       break;
3283    case nir_texop_txf_ms_fb:
3284       vtn_fail("unexpected nir_texop_txf_ms_fb");
3285       break;
3286    case nir_texop_txf_ms_mcs_intel:
3287       vtn_fail("unexpected nir_texop_txf_ms_mcs");
3288       break;
3289    case nir_texop_tex_prefetch:
3290       vtn_fail("unexpected nir_texop_tex_prefetch");
3291       break;
3292    case nir_texop_descriptor_amd:
3293    case nir_texop_sampler_descriptor_amd:
3294       vtn_fail("unexpected nir_texop_*descriptor_amd");
3295       break;
3296    case nir_texop_lod_bias_agx:
3297    case nir_texop_custom_border_color_agx:
3298    case nir_texop_has_custom_border_color_agx:
3299       vtn_fail("unexpected nir_texop_*_agx");
3300       break;
3301    case nir_texop_hdr_dim_nv:
3302    case nir_texop_tex_type_nv:
3303       vtn_fail("unexpected nir_texop_*_nv");
3304       break;
3305    }
3306 
3307    unsigned idx = 4;
3308 
3309    struct nir_def *coord;
3310    unsigned coord_components;
3311    switch (opcode) {
3312    case SpvOpImageSampleImplicitLod:
3313    case SpvOpImageSparseSampleImplicitLod:
3314    case SpvOpImageSampleExplicitLod:
3315    case SpvOpImageSparseSampleExplicitLod:
3316    case SpvOpImageSampleDrefImplicitLod:
3317    case SpvOpImageSparseSampleDrefImplicitLod:
3318    case SpvOpImageSampleDrefExplicitLod:
3319    case SpvOpImageSparseSampleDrefExplicitLod:
3320    case SpvOpImageSampleProjImplicitLod:
3321    case SpvOpImageSampleProjExplicitLod:
3322    case SpvOpImageSampleProjDrefImplicitLod:
3323    case SpvOpImageSampleProjDrefExplicitLod:
3324    case SpvOpImageFetch:
3325    case SpvOpImageSparseFetch:
3326    case SpvOpImageGather:
3327    case SpvOpImageSparseGather:
3328    case SpvOpImageDrefGather:
3329    case SpvOpImageSparseDrefGather:
3330    case SpvOpImageQueryLod:
3331    case SpvOpFragmentFetchAMD:
3332    case SpvOpFragmentMaskFetchAMD: {
3333       /* All these types have the coordinate as their first real argument */
3334       coord_components = glsl_get_sampler_dim_coordinate_components(sampler_dim);
3335 
3336       if (is_array && texop != nir_texop_lod)
3337          coord_components++;
3338 
3339       struct vtn_ssa_value *coord_val = vtn_ssa_value(b, w[idx++]);
3340       coord = coord_val->def;
3341       /* From the SPIR-V spec verxion 1.5, rev. 5:
3342        *
3343        *    "Coordinate must be a scalar or vector of floating-point type. It
3344        *    contains (u[, v] ... [, array layer]) as needed by the definition
3345        *    of Sampled Image. It may be a vector larger than needed, but all
3346        *    unused components appear after all used components."
3347        */
3348       vtn_fail_if(coord->num_components < coord_components,
3349                   "Coordinate value passed has fewer components than sampler dimensionality.");
3350       p->src = nir_src_for_ssa(nir_trim_vector(&b->nb, coord, coord_components));
3351 
3352       /* OpenCL allows integer sampling coordinates */
3353       if (glsl_type_is_integer(coord_val->type) &&
3354           opcode == SpvOpImageSampleExplicitLod) {
3355          vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
3356                      "Unless the Kernel capability is being used, the coordinate parameter "
3357                      "OpImageSampleExplicitLod must be floating point.");
3358 
3359          nir_def *coords[4];
3360          nir_def *f0_5 = nir_imm_float(&b->nb, 0.5);
3361          for (unsigned i = 0; i < coord_components; i++) {
3362             coords[i] = nir_i2f32(&b->nb, nir_channel(&b->nb, p->src.ssa, i));
3363 
3364             if (!is_array || i != coord_components - 1)
3365                coords[i] = nir_fadd(&b->nb, coords[i], f0_5);
3366          }
3367 
3368          p->src = nir_src_for_ssa(nir_vec(&b->nb, coords, coord_components));
3369       }
3370 
3371       p->src_type = nir_tex_src_coord;
3372       p++;
3373       break;
3374    }
3375 
3376    default:
3377       coord = NULL;
3378       coord_components = 0;
3379       break;
3380    }
3381 
3382    switch (opcode) {
3383    case SpvOpImageSampleProjImplicitLod:
3384    case SpvOpImageSampleProjExplicitLod:
3385    case SpvOpImageSampleProjDrefImplicitLod:
3386    case SpvOpImageSampleProjDrefExplicitLod:
3387       /* These have the projector as the last coordinate component */
3388       p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components));
3389       p->src_type = nir_tex_src_projector;
3390       p++;
3391       break;
3392 
3393    default:
3394       break;
3395    }
3396 
3397    bool is_shadow = false;
3398    unsigned gather_component = 0;
3399    switch (opcode) {
3400    case SpvOpImageSampleDrefImplicitLod:
3401    case SpvOpImageSparseSampleDrefImplicitLod:
3402    case SpvOpImageSampleDrefExplicitLod:
3403    case SpvOpImageSparseSampleDrefExplicitLod:
3404    case SpvOpImageSampleProjDrefImplicitLod:
3405    case SpvOpImageSampleProjDrefExplicitLod:
3406    case SpvOpImageDrefGather:
3407    case SpvOpImageSparseDrefGather:
3408       /* These all have an explicit depth value as their next source */
3409       is_shadow = true;
3410       (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
3411       break;
3412 
3413    case SpvOpImageGather:
3414    case SpvOpImageSparseGather:
3415       /* This has a component as its next source */
3416       gather_component = vtn_constant_uint(b, w[idx++]);
3417       break;
3418 
3419    default:
3420       break;
3421    }
3422 
3423    bool is_sparse = false;
3424    switch (opcode) {
3425    case SpvOpImageSparseSampleImplicitLod:
3426    case SpvOpImageSparseSampleExplicitLod:
3427    case SpvOpImageSparseSampleDrefImplicitLod:
3428    case SpvOpImageSparseSampleDrefExplicitLod:
3429    case SpvOpImageSparseFetch:
3430    case SpvOpImageSparseGather:
3431    case SpvOpImageSparseDrefGather:
3432       is_sparse = true;
3433       break;
3434    default:
3435       break;
3436    }
3437 
3438    /* For OpImageQuerySizeLod, we always have an LOD */
3439    if (opcode == SpvOpImageQuerySizeLod)
3440       (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
3441 
3442    /* For OpFragmentFetchAMD, we always have a multisample index */
3443    if (opcode == SpvOpFragmentFetchAMD)
3444       (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
3445 
3446    /* Now we need to handle some number of optional arguments */
3447    struct vtn_value *gather_offsets = NULL;
3448    uint32_t operands = SpvImageOperandsMaskNone;
3449    if (idx < count) {
3450       operands = w[idx];
3451 
3452       if (operands & SpvImageOperandsBiasMask) {
3453          vtn_assert(texop == nir_texop_tex ||
3454                     texop == nir_texop_tg4);
3455          if (texop == nir_texop_tex)
3456             texop = nir_texop_txb;
3457          uint32_t arg = image_operand_arg(b, w, count, idx,
3458                                           SpvImageOperandsBiasMask);
3459          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_bias);
3460       }
3461 
3462       if (operands & SpvImageOperandsLodMask) {
3463          vtn_assert(texop == nir_texop_txl || texop == nir_texop_txf ||
3464                     texop == nir_texop_txs || texop == nir_texop_tg4);
3465          uint32_t arg = image_operand_arg(b, w, count, idx,
3466                                           SpvImageOperandsLodMask);
3467          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_lod);
3468       }
3469 
3470       if (operands & SpvImageOperandsGradMask) {
3471          vtn_assert(texop == nir_texop_txl);
3472          texop = nir_texop_txd;
3473          uint32_t arg = image_operand_arg(b, w, count, idx,
3474                                           SpvImageOperandsGradMask);
3475          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ddx);
3476          (*p++) = vtn_tex_src(b, w[arg + 1], nir_tex_src_ddy);
3477       }
3478 
3479       vtn_fail_if(util_bitcount(operands & (SpvImageOperandsConstOffsetsMask |
3480                                             SpvImageOperandsOffsetMask |
3481                                             SpvImageOperandsConstOffsetMask)) > 1,
3482                   "At most one of the ConstOffset, Offset, and ConstOffsets "
3483                   "image operands can be used on a given instruction.");
3484 
3485       if (operands & SpvImageOperandsOffsetMask) {
3486          uint32_t arg = image_operand_arg(b, w, count, idx,
3487                                           SpvImageOperandsOffsetMask);
3488          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3489       }
3490 
3491       if (operands & SpvImageOperandsConstOffsetMask) {
3492          uint32_t arg = image_operand_arg(b, w, count, idx,
3493                                           SpvImageOperandsConstOffsetMask);
3494          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3495       }
3496 
3497       if (operands & SpvImageOperandsConstOffsetsMask) {
3498          vtn_assert(texop == nir_texop_tg4);
3499          uint32_t arg = image_operand_arg(b, w, count, idx,
3500                                           SpvImageOperandsConstOffsetsMask);
3501          gather_offsets = vtn_value(b, w[arg], vtn_value_type_constant);
3502       }
3503 
3504       if (operands & SpvImageOperandsSampleMask) {
3505          vtn_assert(texop == nir_texop_txf_ms);
3506          uint32_t arg = image_operand_arg(b, w, count, idx,
3507                                           SpvImageOperandsSampleMask);
3508          texop = nir_texop_txf_ms;
3509          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ms_index);
3510       }
3511 
3512       if (operands & SpvImageOperandsMinLodMask) {
3513          vtn_assert(texop == nir_texop_tex ||
3514                     texop == nir_texop_txb ||
3515                     texop == nir_texop_txd);
3516          uint32_t arg = image_operand_arg(b, w, count, idx,
3517                                           SpvImageOperandsMinLodMask);
3518          (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_min_lod);
3519       }
3520    }
3521 
3522    struct vtn_type *ret_type = vtn_get_type(b, w[1]);
3523    struct vtn_type *struct_type = NULL;
3524    if (is_sparse) {
3525       vtn_assert(glsl_type_is_struct_or_ifc(ret_type->type));
3526       struct_type = ret_type;
3527       ret_type = struct_type->members[1];
3528    }
3529 
3530    nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
3531    instr->op = texop;
3532 
3533    memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
3534 
3535    instr->coord_components = coord_components;
3536    instr->sampler_dim = sampler_dim;
3537    instr->is_array = is_array;
3538    instr->is_shadow = is_shadow;
3539    instr->is_sparse = is_sparse;
3540    instr->is_new_style_shadow =
3541       is_shadow && glsl_get_components(ret_type->type) == 1;
3542    instr->component = gather_component;
3543 
3544    /* If SpvCapabilityImageGatherBiasLodAMD is enabled, texture gather without an explicit LOD
3545     * has an implicit one (instead of using level 0).
3546     */
3547    if (texop == nir_texop_tg4 &&
3548        b->enabled_capabilities.ImageGatherBiasLodAMD &&
3549        !(operands & SpvImageOperandsLodMask)) {
3550       instr->is_gather_implicit_lod = true;
3551    }
3552 
3553    /* The Vulkan spec says:
3554     *
3555     *    "If an instruction loads from or stores to a resource (including
3556     *    atomics and image instructions) and the resource descriptor being
3557     *    accessed is not dynamically uniform, then the operand corresponding
3558     *    to that resource (e.g. the pointer or sampled image operand) must be
3559     *    decorated with NonUniform."
3560     *
3561     * It's very careful to specify that the exact operand must be decorated
3562     * NonUniform.  The SPIR-V parser is not expected to chase through long
3563     * chains to find the NonUniform decoration.  It's either right there or we
3564     * can assume it doesn't exist.
3565     */
3566    enum gl_access_qualifier access = 0;
3567    vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
3568 
3569    if (operands & SpvImageOperandsNontemporalMask)
3570       access |= ACCESS_NON_TEMPORAL;
3571 
3572    if (sampler && b->options->force_tex_non_uniform)
3573       access |= ACCESS_NON_UNIFORM;
3574 
3575    if (sampled_val->propagated_non_uniform)
3576       access |= ACCESS_NON_UNIFORM;
3577 
3578    if (image && (access & ACCESS_NON_UNIFORM))
3579       instr->texture_non_uniform = true;
3580 
3581    if (sampler && (access & ACCESS_NON_UNIFORM))
3582       instr->sampler_non_uniform = true;
3583 
3584    /* for non-query ops, get dest_type from SPIR-V return type */
3585    if (dest_type == nir_type_invalid) {
3586       /* the return type should match the image type, unless the image type is
3587        * VOID (CL image), in which case the return type dictates the sampler
3588        */
3589       enum glsl_base_type sampler_base =
3590          glsl_get_sampler_result_type(image->type);
3591       enum glsl_base_type ret_base = glsl_get_base_type(ret_type->type);
3592       vtn_fail_if(sampler_base != ret_base && sampler_base != GLSL_TYPE_VOID,
3593                   "SPIR-V return type mismatches image type. This is only valid "
3594                   "for untyped images (OpenCL).");
3595       dest_type = nir_get_nir_type_for_glsl_base_type(ret_base);
3596       dest_type = get_image_type(b, dest_type, operands);
3597    }
3598 
3599    instr->dest_type = dest_type;
3600 
3601    nir_def_init(&instr->instr, &instr->def,
3602                 nir_tex_instr_dest_size(instr), 32);
3603 
3604    vtn_assert(glsl_get_vector_elements(ret_type->type) ==
3605               nir_tex_instr_result_size(instr));
3606 
3607    if (gather_offsets) {
3608       vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array ||
3609                   gather_offsets->type->length != 4,
3610                   "ConstOffsets must be an array of size four of vectors "
3611                   "of two integer components");
3612 
3613       struct vtn_type *vec_type = gather_offsets->type->array_element;
3614       vtn_fail_if(vec_type->base_type != vtn_base_type_vector ||
3615                   vec_type->length != 2 ||
3616                   !glsl_type_is_integer(vec_type->type),
3617                   "ConstOffsets must be an array of size four of vectors "
3618                   "of two integer components");
3619 
3620       unsigned bit_size = glsl_get_bit_size(vec_type->type);
3621       for (uint32_t i = 0; i < 4; i++) {
3622          const nir_const_value *cvec =
3623             gather_offsets->constant->elements[i]->values;
3624          for (uint32_t j = 0; j < 2; j++) {
3625             switch (bit_size) {
3626             case 8:  instr->tg4_offsets[i][j] = cvec[j].i8;    break;
3627             case 16: instr->tg4_offsets[i][j] = cvec[j].i16;   break;
3628             case 32: instr->tg4_offsets[i][j] = cvec[j].i32;   break;
3629             case 64: instr->tg4_offsets[i][j] = cvec[j].i64;   break;
3630             default:
3631                vtn_fail("Unsupported bit size: %u", bit_size);
3632             }
3633          }
3634       }
3635    }
3636 
3637    nir_builder_instr_insert(&b->nb, &instr->instr);
3638 
3639    if (is_sparse) {
3640       struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3641       unsigned result_size = glsl_get_vector_elements(ret_type->type);
3642       dest->elems[0]->def = nir_channel(&b->nb, &instr->def, result_size);
3643       dest->elems[1]->def = nir_trim_vector(&b->nb, &instr->def,
3644                                               result_size);
3645       vtn_push_ssa_value(b, w[2], dest);
3646    } else {
3647       vtn_push_nir_ssa(b, w[2], &instr->def);
3648    }
3649 }
3650 
3651 static nir_atomic_op
translate_atomic_op(SpvOp opcode)3652 translate_atomic_op(SpvOp opcode)
3653 {
3654    switch (opcode) {
3655    case SpvOpAtomicExchange:            return nir_atomic_op_xchg;
3656    case SpvOpAtomicCompareExchange:     return nir_atomic_op_cmpxchg;
3657    case SpvOpAtomicCompareExchangeWeak: return nir_atomic_op_cmpxchg;
3658    case SpvOpAtomicIIncrement:          return nir_atomic_op_iadd;
3659    case SpvOpAtomicIDecrement:          return nir_atomic_op_iadd;
3660    case SpvOpAtomicIAdd:                return nir_atomic_op_iadd;
3661    case SpvOpAtomicISub:                return nir_atomic_op_iadd;
3662    case SpvOpAtomicSMin:                return nir_atomic_op_imin;
3663    case SpvOpAtomicUMin:                return nir_atomic_op_umin;
3664    case SpvOpAtomicSMax:                return nir_atomic_op_imax;
3665    case SpvOpAtomicUMax:                return nir_atomic_op_umax;
3666    case SpvOpAtomicAnd:                 return nir_atomic_op_iand;
3667    case SpvOpAtomicOr:                  return nir_atomic_op_ior;
3668    case SpvOpAtomicXor:                 return nir_atomic_op_ixor;
3669    case SpvOpAtomicFAddEXT:             return nir_atomic_op_fadd;
3670    case SpvOpAtomicFMinEXT:             return nir_atomic_op_fmin;
3671    case SpvOpAtomicFMaxEXT:             return nir_atomic_op_fmax;
3672    case SpvOpAtomicFlagTestAndSet:      return nir_atomic_op_cmpxchg;
3673    default:
3674       unreachable("Invalid atomic");
3675    }
3676 }
3677 
3678 static void
fill_common_atomic_sources(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_src * src)3679 fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
3680                            const uint32_t *w, nir_src *src)
3681 {
3682    const struct glsl_type *type = vtn_get_type(b, w[1])->type;
3683    unsigned bit_size = glsl_get_bit_size(type);
3684 
3685    switch (opcode) {
3686    case SpvOpAtomicIIncrement:
3687       src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 1, bit_size));
3688       break;
3689 
3690    case SpvOpAtomicIDecrement:
3691       src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, bit_size));
3692       break;
3693 
3694    case SpvOpAtomicISub:
3695       src[0] =
3696          nir_src_for_ssa(nir_ineg(&b->nb, vtn_get_nir_ssa(b, w[6])));
3697       break;
3698 
3699    case SpvOpAtomicCompareExchange:
3700    case SpvOpAtomicCompareExchangeWeak:
3701       src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[8]));
3702       src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[7]));
3703       break;
3704 
3705    case SpvOpAtomicExchange:
3706    case SpvOpAtomicIAdd:
3707    case SpvOpAtomicSMin:
3708    case SpvOpAtomicUMin:
3709    case SpvOpAtomicSMax:
3710    case SpvOpAtomicUMax:
3711    case SpvOpAtomicAnd:
3712    case SpvOpAtomicOr:
3713    case SpvOpAtomicXor:
3714    case SpvOpAtomicFAddEXT:
3715    case SpvOpAtomicFMinEXT:
3716    case SpvOpAtomicFMaxEXT:
3717       src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6]));
3718       break;
3719 
3720    default:
3721       vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3722    }
3723 }
3724 
3725 static nir_def *
get_image_coord(struct vtn_builder * b,uint32_t value)3726 get_image_coord(struct vtn_builder *b, uint32_t value)
3727 {
3728    nir_def *coord = vtn_get_nir_ssa(b, value);
3729    /* The image_load_store intrinsics assume a 4-dim coordinate */
3730    return nir_pad_vec4(&b->nb, coord);
3731 }
3732 
3733 static void
vtn_handle_image(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3734 vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
3735                  const uint32_t *w, unsigned count)
3736 {
3737    /* Just get this one out of the way */
3738    if (opcode == SpvOpImageTexelPointer) {
3739       struct vtn_value *val =
3740          vtn_push_value(b, w[2], vtn_value_type_image_pointer);
3741       val->image = vtn_alloc(b, struct vtn_image_pointer);
3742 
3743       val->image->image = vtn_nir_deref(b, w[3]);
3744       val->image->coord = get_image_coord(b, w[4]);
3745       val->image->sample = vtn_get_nir_ssa(b, w[5]);
3746       val->image->lod = nir_imm_int(&b->nb, 0);
3747       return;
3748    }
3749 
3750    struct vtn_image_pointer image;
3751    SpvScope scope = SpvScopeInvocation;
3752    SpvMemorySemanticsMask semantics = 0;
3753    SpvImageOperandsMask operands = SpvImageOperandsMaskNone;
3754 
3755    enum gl_access_qualifier access = 0;
3756 
3757    struct vtn_value *res_val;
3758    switch (opcode) {
3759    case SpvOpAtomicExchange:
3760    case SpvOpAtomicCompareExchange:
3761    case SpvOpAtomicCompareExchangeWeak:
3762    case SpvOpAtomicIIncrement:
3763    case SpvOpAtomicIDecrement:
3764    case SpvOpAtomicIAdd:
3765    case SpvOpAtomicISub:
3766    case SpvOpAtomicLoad:
3767    case SpvOpAtomicSMin:
3768    case SpvOpAtomicUMin:
3769    case SpvOpAtomicSMax:
3770    case SpvOpAtomicUMax:
3771    case SpvOpAtomicAnd:
3772    case SpvOpAtomicOr:
3773    case SpvOpAtomicXor:
3774    case SpvOpAtomicFAddEXT:
3775    case SpvOpAtomicFMinEXT:
3776    case SpvOpAtomicFMaxEXT:
3777       res_val = vtn_value(b, w[3], vtn_value_type_image_pointer);
3778       image = *res_val->image;
3779       scope = vtn_constant_uint(b, w[4]);
3780       semantics = vtn_constant_uint(b, w[5]);
3781       access |= ACCESS_COHERENT;
3782       break;
3783 
3784    case SpvOpAtomicStore:
3785       res_val = vtn_value(b, w[1], vtn_value_type_image_pointer);
3786       image = *res_val->image;
3787       scope = vtn_constant_uint(b, w[2]);
3788       semantics = vtn_constant_uint(b, w[3]);
3789       access |= ACCESS_COHERENT;
3790       break;
3791 
3792    case SpvOpImageQuerySizeLod:
3793       res_val = vtn_untyped_value(b, w[3]);
3794       image.image = vtn_get_image(b, w[3], &access);
3795       image.coord = NULL;
3796       image.sample = NULL;
3797       image.lod = vtn_ssa_value(b, w[4])->def;
3798       break;
3799 
3800    case SpvOpImageQueryFormat:
3801    case SpvOpImageQueryLevels:
3802    case SpvOpImageQueryOrder:
3803    case SpvOpImageQuerySamples:
3804    case SpvOpImageQuerySize:
3805       res_val = vtn_untyped_value(b, w[3]);
3806       image.image = vtn_get_image(b, w[3], &access);
3807       image.coord = NULL;
3808       image.sample = NULL;
3809       image.lod = NULL;
3810       break;
3811 
3812    case SpvOpImageRead:
3813    case SpvOpImageSparseRead: {
3814       res_val = vtn_untyped_value(b, w[3]);
3815       image.image = vtn_get_image(b, w[3], &access);
3816       image.coord = get_image_coord(b, w[4]);
3817 
3818       operands = count > 5 ? w[5] : SpvImageOperandsMaskNone;
3819 
3820       if (operands & SpvImageOperandsSampleMask) {
3821          uint32_t arg = image_operand_arg(b, w, count, 5,
3822                                           SpvImageOperandsSampleMask);
3823          image.sample = vtn_get_nir_ssa(b, w[arg]);
3824       } else {
3825          image.sample = nir_undef(&b->nb, 1, 32);
3826       }
3827 
3828       if (operands & SpvImageOperandsMakeTexelVisibleMask) {
3829          vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3830                      "MakeTexelVisible requires NonPrivateTexel to also be set.");
3831          uint32_t arg = image_operand_arg(b, w, count, 5,
3832                                           SpvImageOperandsMakeTexelVisibleMask);
3833          semantics = SpvMemorySemanticsMakeVisibleMask;
3834          scope = vtn_constant_uint(b, w[arg]);
3835       }
3836 
3837       if (operands & SpvImageOperandsLodMask) {
3838          uint32_t arg = image_operand_arg(b, w, count, 5,
3839                                           SpvImageOperandsLodMask);
3840          image.lod = vtn_get_nir_ssa(b, w[arg]);
3841       } else {
3842          image.lod = nir_imm_int(&b->nb, 0);
3843       }
3844 
3845       if (operands & SpvImageOperandsVolatileTexelMask)
3846          access |= ACCESS_VOLATILE;
3847       if (operands & SpvImageOperandsNontemporalMask)
3848          access |= ACCESS_NON_TEMPORAL;
3849 
3850       break;
3851    }
3852 
3853    case SpvOpImageWrite: {
3854       res_val = vtn_untyped_value(b, w[1]);
3855       image.image = vtn_get_image(b, w[1], &access);
3856       image.coord = get_image_coord(b, w[2]);
3857 
3858       /* texel = w[3] */
3859 
3860       operands = count > 4 ? w[4] : SpvImageOperandsMaskNone;
3861 
3862       if (operands & SpvImageOperandsSampleMask) {
3863          uint32_t arg = image_operand_arg(b, w, count, 4,
3864                                           SpvImageOperandsSampleMask);
3865          image.sample = vtn_get_nir_ssa(b, w[arg]);
3866       } else {
3867          image.sample = nir_undef(&b->nb, 1, 32);
3868       }
3869 
3870       if (operands & SpvImageOperandsMakeTexelAvailableMask) {
3871          vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3872                      "MakeTexelAvailable requires NonPrivateTexel to also be set.");
3873          uint32_t arg = image_operand_arg(b, w, count, 4,
3874                                           SpvImageOperandsMakeTexelAvailableMask);
3875          semantics = SpvMemorySemanticsMakeAvailableMask;
3876          scope = vtn_constant_uint(b, w[arg]);
3877       }
3878 
3879       if (operands & SpvImageOperandsLodMask) {
3880          uint32_t arg = image_operand_arg(b, w, count, 4,
3881                                           SpvImageOperandsLodMask);
3882          image.lod = vtn_get_nir_ssa(b, w[arg]);
3883       } else {
3884          image.lod = nir_imm_int(&b->nb, 0);
3885       }
3886 
3887       if (operands & SpvImageOperandsVolatileTexelMask)
3888          access |= ACCESS_VOLATILE;
3889       if (operands & SpvImageOperandsNontemporalMask)
3890          access |= ACCESS_NON_TEMPORAL;
3891 
3892       break;
3893    }
3894 
3895    default:
3896       vtn_fail_with_opcode("Invalid image opcode", opcode);
3897    }
3898 
3899    if (semantics & SpvMemorySemanticsVolatileMask)
3900       access |= ACCESS_VOLATILE;
3901 
3902    nir_intrinsic_op op;
3903    switch (opcode) {
3904 #define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_deref_##N; break;
3905    OP(ImageQuerySize,            size)
3906    OP(ImageQuerySizeLod,         size)
3907    OP(ImageRead,                 load)
3908    OP(ImageSparseRead,           sparse_load)
3909    OP(ImageWrite,                store)
3910    OP(AtomicLoad,                load)
3911    OP(AtomicStore,               store)
3912    OP(AtomicExchange,            atomic)
3913    OP(AtomicCompareExchange,     atomic_swap)
3914    OP(AtomicCompareExchangeWeak, atomic_swap)
3915    OP(AtomicIIncrement,          atomic)
3916    OP(AtomicIDecrement,          atomic)
3917    OP(AtomicIAdd,                atomic)
3918    OP(AtomicISub,                atomic)
3919    OP(AtomicSMin,                atomic)
3920    OP(AtomicUMin,                atomic)
3921    OP(AtomicSMax,                atomic)
3922    OP(AtomicUMax,                atomic)
3923    OP(AtomicAnd,                 atomic)
3924    OP(AtomicOr,                  atomic)
3925    OP(AtomicXor,                 atomic)
3926    OP(AtomicFAddEXT,             atomic)
3927    OP(AtomicFMinEXT,             atomic)
3928    OP(AtomicFMaxEXT,             atomic)
3929    OP(ImageQueryFormat,          format)
3930    OP(ImageQueryLevels,          levels)
3931    OP(ImageQueryOrder,           order)
3932    OP(ImageQuerySamples,         samples)
3933 #undef OP
3934    default:
3935       vtn_fail_with_opcode("Invalid image opcode", opcode);
3936    }
3937 
3938    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
3939    if (nir_intrinsic_has_atomic_op(intrin))
3940       nir_intrinsic_set_atomic_op(intrin, translate_atomic_op(opcode));
3941 
3942    intrin->src[0] = nir_src_for_ssa(&image.image->def);
3943    nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(image.image->type));
3944    nir_intrinsic_set_image_array(intrin,
3945       glsl_sampler_type_is_array(image.image->type));
3946 
3947    switch (opcode) {
3948    case SpvOpImageQueryLevels:
3949    case SpvOpImageQuerySamples:
3950    case SpvOpImageQuerySize:
3951    case SpvOpImageQuerySizeLod:
3952    case SpvOpImageQueryFormat:
3953    case SpvOpImageQueryOrder:
3954       break;
3955    default:
3956       /* The image coordinate is always 4 components but we may not have that
3957        * many.  Swizzle to compensate.
3958        */
3959       intrin->src[1] = nir_src_for_ssa(nir_pad_vec4(&b->nb, image.coord));
3960       intrin->src[2] = nir_src_for_ssa(image.sample);
3961       break;
3962    }
3963 
3964    /* The Vulkan spec says:
3965     *
3966     *    "If an instruction loads from or stores to a resource (including
3967     *    atomics and image instructions) and the resource descriptor being
3968     *    accessed is not dynamically uniform, then the operand corresponding
3969     *    to that resource (e.g. the pointer or sampled image operand) must be
3970     *    decorated with NonUniform."
3971     *
3972     * It's very careful to specify that the exact operand must be decorated
3973     * NonUniform.  The SPIR-V parser is not expected to chase through long
3974     * chains to find the NonUniform decoration.  It's either right there or we
3975     * can assume it doesn't exist.
3976     */
3977    vtn_foreach_decoration(b, res_val, non_uniform_decoration_cb, &access);
3978    nir_intrinsic_set_access(intrin, access);
3979 
3980    switch (opcode) {
3981    case SpvOpImageQueryLevels:
3982    case SpvOpImageQuerySamples:
3983    case SpvOpImageQueryFormat:
3984    case SpvOpImageQueryOrder:
3985       /* No additional sources */
3986       break;
3987    case SpvOpImageQuerySize:
3988       intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0));
3989       break;
3990    case SpvOpImageQuerySizeLod:
3991       intrin->src[1] = nir_src_for_ssa(image.lod);
3992       break;
3993    case SpvOpAtomicLoad:
3994    case SpvOpImageRead:
3995    case SpvOpImageSparseRead:
3996       /* Only OpImageRead can support a lod parameter if
3997       * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3998       * intrinsics definition for atomics requires us to set it for
3999       * OpAtomicLoad.
4000       */
4001       intrin->src[3] = nir_src_for_ssa(image.lod);
4002       break;
4003    case SpvOpAtomicStore:
4004    case SpvOpImageWrite: {
4005       const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3];
4006       struct vtn_ssa_value *value = vtn_ssa_value(b, value_id);
4007       /* nir_intrinsic_image_deref_store always takes a vec4 value */
4008       assert(op == nir_intrinsic_image_deref_store);
4009       intrin->num_components = 4;
4010       intrin->src[3] = nir_src_for_ssa(nir_pad_vec4(&b->nb, value->def));
4011       /* Only OpImageWrite can support a lod parameter if
4012        * SPV_AMD_shader_image_load_store_lod is used but the current NIR
4013        * intrinsics definition for atomics requires us to set it for
4014        * OpAtomicStore.
4015        */
4016       intrin->src[4] = nir_src_for_ssa(image.lod);
4017 
4018       nir_alu_type src_type =
4019          get_image_type(b, nir_get_nir_type_for_glsl_type(value->type), operands);
4020       nir_intrinsic_set_src_type(intrin, src_type);
4021       break;
4022    }
4023 
4024    case SpvOpAtomicCompareExchange:
4025    case SpvOpAtomicCompareExchangeWeak:
4026    case SpvOpAtomicIIncrement:
4027    case SpvOpAtomicIDecrement:
4028    case SpvOpAtomicExchange:
4029    case SpvOpAtomicIAdd:
4030    case SpvOpAtomicISub:
4031    case SpvOpAtomicSMin:
4032    case SpvOpAtomicUMin:
4033    case SpvOpAtomicSMax:
4034    case SpvOpAtomicUMax:
4035    case SpvOpAtomicAnd:
4036    case SpvOpAtomicOr:
4037    case SpvOpAtomicXor:
4038    case SpvOpAtomicFAddEXT:
4039    case SpvOpAtomicFMinEXT:
4040    case SpvOpAtomicFMaxEXT:
4041       fill_common_atomic_sources(b, opcode, w, &intrin->src[3]);
4042       break;
4043 
4044    default:
4045       vtn_fail_with_opcode("Invalid image opcode", opcode);
4046    }
4047 
4048    /* Image operations implicitly have the Image storage memory semantics. */
4049    semantics |= SpvMemorySemanticsImageMemoryMask;
4050 
4051    SpvMemorySemanticsMask before_semantics;
4052    SpvMemorySemanticsMask after_semantics;
4053    vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
4054 
4055    if (before_semantics)
4056       vtn_emit_memory_barrier(b, scope, before_semantics);
4057 
4058    if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) {
4059       struct vtn_type *type = vtn_get_type(b, w[1]);
4060       struct vtn_type *struct_type = NULL;
4061       if (opcode == SpvOpImageSparseRead) {
4062          vtn_assert(glsl_type_is_struct_or_ifc(type->type));
4063          struct_type = type;
4064          type = struct_type->members[1];
4065       }
4066 
4067       unsigned dest_components = glsl_get_vector_elements(type->type);
4068       if (opcode == SpvOpImageSparseRead)
4069          dest_components++;
4070 
4071       if (nir_intrinsic_infos[op].dest_components == 0)
4072          intrin->num_components = dest_components;
4073 
4074       unsigned bit_size = glsl_get_bit_size(type->type);
4075       if (opcode == SpvOpImageQuerySize ||
4076           opcode == SpvOpImageQuerySizeLod)
4077          bit_size = MIN2(bit_size, 32);
4078 
4079       nir_def_init(&intrin->instr, &intrin->def,
4080                    nir_intrinsic_dest_components(intrin), bit_size);
4081 
4082       nir_builder_instr_insert(&b->nb, &intrin->instr);
4083 
4084       nir_def *result = nir_trim_vector(&b->nb, &intrin->def,
4085                                               dest_components);
4086 
4087       if (opcode == SpvOpImageQuerySize ||
4088           opcode == SpvOpImageQuerySizeLod)
4089          result = nir_u2uN(&b->nb, result, glsl_get_bit_size(type->type));
4090 
4091       if (opcode == SpvOpImageSparseRead) {
4092          struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
4093          unsigned res_type_size = glsl_get_vector_elements(type->type);
4094          dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size);
4095          if (intrin->def.bit_size != 32)
4096             dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def);
4097          dest->elems[1]->def = nir_trim_vector(&b->nb, result, res_type_size);
4098          vtn_push_ssa_value(b, w[2], dest);
4099       } else {
4100          vtn_push_nir_ssa(b, w[2], result);
4101       }
4102 
4103       if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead ||
4104           opcode == SpvOpAtomicLoad) {
4105          nir_alu_type dest_type =
4106             get_image_type(b, nir_get_nir_type_for_glsl_type(type->type), operands);
4107          nir_intrinsic_set_dest_type(intrin, dest_type);
4108       }
4109    } else {
4110       nir_builder_instr_insert(&b->nb, &intrin->instr);
4111    }
4112 
4113    if (after_semantics)
4114       vtn_emit_memory_barrier(b, scope, after_semantics);
4115 }
4116 
4117 static nir_intrinsic_op
get_uniform_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)4118 get_uniform_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
4119 {
4120    switch (opcode) {
4121 #define OP(S, N) case SpvOp##S: return nir_intrinsic_atomic_counter_ ##N;
4122    OP(AtomicLoad,                read_deref)
4123    OP(AtomicExchange,            exchange)
4124    OP(AtomicCompareExchange,     comp_swap)
4125    OP(AtomicCompareExchangeWeak, comp_swap)
4126    OP(AtomicIIncrement,          inc_deref)
4127    OP(AtomicIDecrement,          post_dec_deref)
4128    OP(AtomicIAdd,                add_deref)
4129    OP(AtomicISub,                add_deref)
4130    OP(AtomicUMin,                min_deref)
4131    OP(AtomicUMax,                max_deref)
4132    OP(AtomicAnd,                 and_deref)
4133    OP(AtomicOr,                  or_deref)
4134    OP(AtomicXor,                 xor_deref)
4135 #undef OP
4136    default:
4137       /* We left the following out: AtomicStore, AtomicSMin and
4138        * AtomicSmax. Right now there are not nir intrinsics for them. At this
4139        * moment Atomic Counter support is needed for ARB_spirv support, so is
4140        * only need to support GLSL Atomic Counters that are uints and don't
4141        * allow direct storage.
4142        */
4143       vtn_fail("Invalid uniform atomic");
4144    }
4145 }
4146 
4147 static nir_intrinsic_op
get_deref_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)4148 get_deref_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
4149 {
4150    switch (opcode) {
4151    case SpvOpAtomicLoad:         return nir_intrinsic_load_deref;
4152    case SpvOpAtomicFlagClear:
4153    case SpvOpAtomicStore:        return nir_intrinsic_store_deref;
4154 #define OP(S, N) case SpvOp##S: return nir_intrinsic_deref_##N;
4155    OP(AtomicExchange,            atomic)
4156    OP(AtomicCompareExchange,     atomic_swap)
4157    OP(AtomicCompareExchangeWeak, atomic_swap)
4158    OP(AtomicIIncrement,          atomic)
4159    OP(AtomicIDecrement,          atomic)
4160    OP(AtomicIAdd,                atomic)
4161    OP(AtomicISub,                atomic)
4162    OP(AtomicSMin,                atomic)
4163    OP(AtomicUMin,                atomic)
4164    OP(AtomicSMax,                atomic)
4165    OP(AtomicUMax,                atomic)
4166    OP(AtomicAnd,                 atomic)
4167    OP(AtomicOr,                  atomic)
4168    OP(AtomicXor,                 atomic)
4169    OP(AtomicFAddEXT,             atomic)
4170    OP(AtomicFMinEXT,             atomic)
4171    OP(AtomicFMaxEXT,             atomic)
4172    OP(AtomicFlagTestAndSet,      atomic_swap)
4173 #undef OP
4174    default:
4175       vtn_fail_with_opcode("Invalid shared atomic", opcode);
4176    }
4177 }
4178 
4179 /*
4180  * Handles shared atomics, ssbo atomics and atomic counters.
4181  */
4182 static void
vtn_handle_atomics(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4183 vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
4184                    const uint32_t *w, UNUSED unsigned count)
4185 {
4186    struct vtn_pointer *ptr;
4187    nir_intrinsic_instr *atomic;
4188 
4189    SpvScope scope = SpvScopeInvocation;
4190    SpvMemorySemanticsMask semantics = 0;
4191    enum gl_access_qualifier access = 0;
4192 
4193    switch (opcode) {
4194    case SpvOpAtomicLoad:
4195    case SpvOpAtomicExchange:
4196    case SpvOpAtomicCompareExchange:
4197    case SpvOpAtomicCompareExchangeWeak:
4198    case SpvOpAtomicIIncrement:
4199    case SpvOpAtomicIDecrement:
4200    case SpvOpAtomicIAdd:
4201    case SpvOpAtomicISub:
4202    case SpvOpAtomicSMin:
4203    case SpvOpAtomicUMin:
4204    case SpvOpAtomicSMax:
4205    case SpvOpAtomicUMax:
4206    case SpvOpAtomicAnd:
4207    case SpvOpAtomicOr:
4208    case SpvOpAtomicXor:
4209    case SpvOpAtomicFAddEXT:
4210    case SpvOpAtomicFMinEXT:
4211    case SpvOpAtomicFMaxEXT:
4212    case SpvOpAtomicFlagTestAndSet:
4213       ptr = vtn_pointer(b, w[3]);
4214       scope = vtn_constant_uint(b, w[4]);
4215       semantics = vtn_constant_uint(b, w[5]);
4216       break;
4217    case SpvOpAtomicFlagClear:
4218    case SpvOpAtomicStore:
4219       ptr = vtn_pointer(b, w[1]);
4220       scope = vtn_constant_uint(b, w[2]);
4221       semantics = vtn_constant_uint(b, w[3]);
4222       break;
4223 
4224    default:
4225       vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
4226    }
4227 
4228    if (semantics & SpvMemorySemanticsVolatileMask)
4229       access |= ACCESS_VOLATILE;
4230 
4231    /* uniform as "atomic counter uniform" */
4232    if (ptr->mode == vtn_variable_mode_atomic_counter) {
4233       nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
4234       nir_intrinsic_op op = get_uniform_nir_atomic_op(b, opcode);
4235       atomic = nir_intrinsic_instr_create(b->nb.shader, op);
4236       atomic->src[0] = nir_src_for_ssa(&deref->def);
4237 
4238       /* SSBO needs to initialize index/offset. In this case we don't need to,
4239        * as that info is already stored on the ptr->var->var nir_variable (see
4240        * vtn_create_variable)
4241        */
4242 
4243       switch (opcode) {
4244       case SpvOpAtomicLoad:
4245       case SpvOpAtomicExchange:
4246       case SpvOpAtomicCompareExchange:
4247       case SpvOpAtomicCompareExchangeWeak:
4248       case SpvOpAtomicIIncrement:
4249       case SpvOpAtomicIDecrement:
4250       case SpvOpAtomicIAdd:
4251       case SpvOpAtomicISub:
4252       case SpvOpAtomicSMin:
4253       case SpvOpAtomicUMin:
4254       case SpvOpAtomicSMax:
4255       case SpvOpAtomicUMax:
4256       case SpvOpAtomicAnd:
4257       case SpvOpAtomicOr:
4258       case SpvOpAtomicXor:
4259          /* Nothing: we don't need to call fill_common_atomic_sources here, as
4260           * atomic counter uniforms doesn't have sources
4261           */
4262          break;
4263 
4264       default:
4265          unreachable("Invalid SPIR-V atomic");
4266 
4267       }
4268    } else {
4269       nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
4270       const struct glsl_type *deref_type = deref->type;
4271       nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode);
4272       atomic = nir_intrinsic_instr_create(b->nb.shader, op);
4273       atomic->src[0] = nir_src_for_ssa(&deref->def);
4274 
4275       if (nir_intrinsic_has_atomic_op(atomic))
4276          nir_intrinsic_set_atomic_op(atomic, translate_atomic_op(opcode));
4277 
4278       if (ptr->mode != vtn_variable_mode_workgroup)
4279          access |= ACCESS_COHERENT;
4280 
4281       nir_intrinsic_set_access(atomic, access);
4282 
4283       switch (opcode) {
4284       case SpvOpAtomicLoad:
4285          atomic->num_components = glsl_get_vector_elements(deref_type);
4286          break;
4287 
4288       case SpvOpAtomicStore:
4289          atomic->num_components = glsl_get_vector_elements(deref_type);
4290          nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
4291          atomic->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
4292          break;
4293 
4294       case SpvOpAtomicFlagClear:
4295          atomic->num_components = 1;
4296          nir_intrinsic_set_write_mask(atomic, 1);
4297          atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4298          break;
4299       case SpvOpAtomicFlagTestAndSet:
4300          atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4301          atomic->src[2] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, 32));
4302          break;
4303       case SpvOpAtomicExchange:
4304       case SpvOpAtomicCompareExchange:
4305       case SpvOpAtomicCompareExchangeWeak:
4306       case SpvOpAtomicIIncrement:
4307       case SpvOpAtomicIDecrement:
4308       case SpvOpAtomicIAdd:
4309       case SpvOpAtomicISub:
4310       case SpvOpAtomicSMin:
4311       case SpvOpAtomicUMin:
4312       case SpvOpAtomicSMax:
4313       case SpvOpAtomicUMax:
4314       case SpvOpAtomicAnd:
4315       case SpvOpAtomicOr:
4316       case SpvOpAtomicXor:
4317       case SpvOpAtomicFAddEXT:
4318       case SpvOpAtomicFMinEXT:
4319       case SpvOpAtomicFMaxEXT:
4320          fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
4321          break;
4322 
4323       default:
4324          vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
4325       }
4326    }
4327 
4328    /* Atomic ordering operations will implicitly apply to the atomic operation
4329     * storage class, so include that too.
4330     */
4331    semantics |= vtn_mode_to_memory_semantics(ptr->mode);
4332 
4333    SpvMemorySemanticsMask before_semantics;
4334    SpvMemorySemanticsMask after_semantics;
4335    vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
4336 
4337    if (before_semantics)
4338       vtn_emit_memory_barrier(b, scope, before_semantics);
4339 
4340    if (opcode != SpvOpAtomicStore && opcode != SpvOpAtomicFlagClear) {
4341       struct vtn_type *type = vtn_get_type(b, w[1]);
4342 
4343       if (opcode == SpvOpAtomicFlagTestAndSet) {
4344          /* map atomic flag to a 32-bit atomic integer. */
4345          nir_def_init(&atomic->instr, &atomic->def, 1, 32);
4346       } else {
4347          nir_def_init(&atomic->instr, &atomic->def,
4348                       glsl_get_vector_elements(type->type),
4349                       glsl_get_bit_size(type->type));
4350 
4351          vtn_push_nir_ssa(b, w[2], &atomic->def);
4352       }
4353    }
4354 
4355    nir_builder_instr_insert(&b->nb, &atomic->instr);
4356 
4357    if (opcode == SpvOpAtomicFlagTestAndSet) {
4358       vtn_push_nir_ssa(b, w[2], nir_i2b(&b->nb, &atomic->def));
4359    }
4360    if (after_semantics)
4361       vtn_emit_memory_barrier(b, scope, after_semantics);
4362 }
4363 
4364 static nir_alu_instr *
create_vec(struct vtn_builder * b,unsigned num_components,unsigned bit_size)4365 create_vec(struct vtn_builder *b, unsigned num_components, unsigned bit_size)
4366 {
4367    nir_op op = nir_op_vec(num_components);
4368    nir_alu_instr *vec = nir_alu_instr_create(b->shader, op);
4369    nir_def_init(&vec->instr, &vec->def, num_components, bit_size);
4370 
4371    return vec;
4372 }
4373 
4374 struct vtn_ssa_value *
vtn_ssa_transpose(struct vtn_builder * b,struct vtn_ssa_value * src)4375 vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
4376 {
4377    if (src->transposed)
4378       return src->transposed;
4379 
4380    struct vtn_ssa_value *dest =
4381       vtn_create_ssa_value(b, glsl_transposed_type(src->type));
4382 
4383    for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
4384       if (glsl_type_is_vector_or_scalar(src->type)) {
4385          dest->elems[i]->def = nir_channel(&b->nb, src->def, i);
4386       } else {
4387          unsigned cols = glsl_get_matrix_columns(src->type);
4388          nir_scalar srcs[NIR_MAX_MATRIX_COLUMNS];
4389          for (unsigned j = 0; j < cols; j++) {
4390             srcs[j] = nir_get_scalar(src->elems[j]->def, i);
4391          }
4392          dest->elems[i]->def = nir_vec_scalars(&b->nb, srcs, cols);
4393       }
4394    }
4395 
4396    dest->transposed = src;
4397 
4398    return dest;
4399 }
4400 
4401 static nir_def *
vtn_vector_shuffle(struct vtn_builder * b,unsigned num_components,nir_def * src0,nir_def * src1,const uint32_t * indices)4402 vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
4403                    nir_def *src0, nir_def *src1,
4404                    const uint32_t *indices)
4405 {
4406    nir_alu_instr *vec = create_vec(b, num_components, src0->bit_size);
4407 
4408    for (unsigned i = 0; i < num_components; i++) {
4409       uint32_t index = indices[i];
4410       unsigned total_components = src0->num_components + src1->num_components;
4411       vtn_fail_if(index != 0xffffffff && index >= total_components,
4412                   "OpVectorShuffle: All Component literals must either be "
4413                   "FFFFFFFF or in [0, N - 1] (inclusive)");
4414 
4415       if (index == 0xffffffff) {
4416          vec->src[i].src =
4417             nir_src_for_ssa(nir_undef(&b->nb, 1, src0->bit_size));
4418       } else if (index < src0->num_components) {
4419          vec->src[i].src = nir_src_for_ssa(src0);
4420          vec->src[i].swizzle[0] = index;
4421       } else {
4422          vec->src[i].src = nir_src_for_ssa(src1);
4423          vec->src[i].swizzle[0] = index - src0->num_components;
4424       }
4425    }
4426 
4427    nir_builder_instr_insert(&b->nb, &vec->instr);
4428 
4429    return &vec->def;
4430 }
4431 
4432 /*
4433  * Concatentates a number of vectors/scalars together to produce a vector
4434  */
4435 static nir_def *
vtn_vector_construct(struct vtn_builder * b,unsigned num_components,unsigned num_srcs,nir_def ** srcs)4436 vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
4437                      unsigned num_srcs, nir_def **srcs)
4438 {
4439    nir_alu_instr *vec = create_vec(b, num_components, srcs[0]->bit_size);
4440 
4441    /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4442     *
4443     *    "When constructing a vector, there must be at least two Constituent
4444     *    operands."
4445     */
4446    vtn_assert(num_srcs >= 2);
4447 
4448    unsigned dest_idx = 0;
4449    for (unsigned i = 0; i < num_srcs; i++) {
4450       nir_def *src = srcs[i];
4451       vtn_assert(dest_idx + src->num_components <= num_components);
4452       for (unsigned j = 0; j < src->num_components; j++) {
4453          vec->src[dest_idx].src = nir_src_for_ssa(src);
4454          vec->src[dest_idx].swizzle[0] = j;
4455          dest_idx++;
4456       }
4457    }
4458 
4459    /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4460     *
4461     *    "When constructing a vector, the total number of components in all
4462     *    the operands must equal the number of components in Result Type."
4463     */
4464    vtn_assert(dest_idx == num_components);
4465 
4466    nir_builder_instr_insert(&b->nb, &vec->instr);
4467 
4468    return &vec->def;
4469 }
4470 
4471 static struct vtn_ssa_value *
vtn_composite_copy(struct vtn_builder * b,struct vtn_ssa_value * src)4472 vtn_composite_copy(struct vtn_builder *b, struct vtn_ssa_value *src)
4473 {
4474    assert(!src->is_variable);
4475 
4476    struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
4477    dest->type = src->type;
4478 
4479    if (glsl_type_is_vector_or_scalar(src->type)) {
4480       dest->def = src->def;
4481    } else {
4482       unsigned elems = glsl_get_length(src->type);
4483 
4484       dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
4485       for (unsigned i = 0; i < elems; i++)
4486          dest->elems[i] = vtn_composite_copy(b, src->elems[i]);
4487    }
4488 
4489    return dest;
4490 }
4491 
4492 static struct vtn_ssa_value *
vtn_composite_insert(struct vtn_builder * b,struct vtn_ssa_value * src,struct vtn_ssa_value * insert,const uint32_t * indices,unsigned num_indices)4493 vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
4494                      struct vtn_ssa_value *insert, const uint32_t *indices,
4495                      unsigned num_indices)
4496 {
4497    if (glsl_type_is_cmat(src->type))
4498       return vtn_cooperative_matrix_insert(b, src, insert, indices, num_indices);
4499 
4500    struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
4501 
4502    struct vtn_ssa_value *cur = dest;
4503    unsigned i;
4504    for (i = 0; i < num_indices - 1; i++) {
4505       /* If we got a vector here, that means the next index will be trying to
4506        * dereference a scalar.
4507        */
4508       vtn_fail_if(glsl_type_is_vector_or_scalar(cur->type),
4509                   "OpCompositeInsert has too many indices.");
4510       vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4511                   "All indices in an OpCompositeInsert must be in-bounds");
4512       cur = cur->elems[indices[i]];
4513    }
4514 
4515    if (glsl_type_is_vector_or_scalar(cur->type)) {
4516       vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4517                   "All indices in an OpCompositeInsert must be in-bounds");
4518 
4519       /* According to the SPIR-V spec, OpCompositeInsert may work down to
4520        * the component granularity. In that case, the last index will be
4521        * the index to insert the scalar into the vector.
4522        */
4523 
4524       cur->def = nir_vector_insert_imm(&b->nb, cur->def, insert->def, indices[i]);
4525    } else {
4526       vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4527                   "All indices in an OpCompositeInsert must be in-bounds");
4528       cur->elems[indices[i]] = insert;
4529    }
4530 
4531    return dest;
4532 }
4533 
4534 static struct vtn_ssa_value *
vtn_composite_extract(struct vtn_builder * b,struct vtn_ssa_value * src,const uint32_t * indices,unsigned num_indices)4535 vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
4536                       const uint32_t *indices, unsigned num_indices)
4537 {
4538    if (glsl_type_is_cmat(src->type))
4539       return vtn_cooperative_matrix_extract(b, src, indices, num_indices);
4540 
4541    struct vtn_ssa_value *cur = src;
4542    for (unsigned i = 0; i < num_indices; i++) {
4543       if (glsl_type_is_vector_or_scalar(cur->type)) {
4544          vtn_assert(i == num_indices - 1);
4545          vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4546                      "All indices in an OpCompositeExtract must be in-bounds");
4547 
4548          /* According to the SPIR-V spec, OpCompositeExtract may work down to
4549           * the component granularity. The last index will be the index of the
4550           * vector to extract.
4551           */
4552 
4553          const struct glsl_type *scalar_type =
4554             glsl_scalar_type(glsl_get_base_type(cur->type));
4555          struct vtn_ssa_value *ret = vtn_create_ssa_value(b, scalar_type);
4556          ret->def = nir_channel(&b->nb, cur->def, indices[i]);
4557          return ret;
4558       } else {
4559          vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4560                      "All indices in an OpCompositeExtract must be in-bounds");
4561          cur = cur->elems[indices[i]];
4562       }
4563    }
4564 
4565    return cur;
4566 }
4567 
4568 static void
vtn_handle_composite(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4569 vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
4570                      const uint32_t *w, unsigned count)
4571 {
4572    struct vtn_type *type = vtn_get_type(b, w[1]);
4573    struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
4574 
4575    switch (opcode) {
4576    case SpvOpVectorExtractDynamic:
4577       ssa->def = nir_vector_extract(&b->nb, vtn_get_nir_ssa(b, w[3]),
4578                                     vtn_get_nir_ssa(b, w[4]));
4579       break;
4580 
4581    case SpvOpVectorInsertDynamic:
4582       ssa->def = nir_vector_insert(&b->nb, vtn_get_nir_ssa(b, w[3]),
4583                                    vtn_get_nir_ssa(b, w[4]),
4584                                    vtn_get_nir_ssa(b, w[5]));
4585       break;
4586 
4587    case SpvOpVectorShuffle:
4588       ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type),
4589                                     vtn_get_nir_ssa(b, w[3]),
4590                                     vtn_get_nir_ssa(b, w[4]),
4591                                     w + 5);
4592       break;
4593 
4594    case SpvOpCompositeConstruct:
4595    case SpvOpCompositeConstructReplicateEXT: {
4596       unsigned elems = count - 3;
4597       assume(elems >= 1);
4598       if (type->base_type == vtn_base_type_cooperative_matrix) {
4599          vtn_assert(elems == 1);
4600          nir_deref_instr *mat = vtn_create_cmat_temporary(b, type->type, "cmat_construct");
4601          nir_cmat_construct(&b->nb, &mat->def, vtn_get_nir_ssa(b, w[3]));
4602          vtn_set_ssa_value_var(b, ssa, mat->var);
4603       } else if (glsl_type_is_vector_or_scalar(type->type)) {
4604          if (opcode == SpvOpCompositeConstructReplicateEXT) {
4605             nir_def *src = vtn_get_nir_ssa(b, w[3]);
4606             vtn_assert(glsl_get_bit_size(type->type) == src->bit_size);
4607             unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
4608             ssa->def = nir_swizzle(&b->nb, src, swiz,
4609                                    glsl_get_vector_elements(type->type));
4610          } else {
4611             nir_def *srcs[NIR_MAX_VEC_COMPONENTS];
4612             for (unsigned i = 0; i < elems; i++) {
4613                srcs[i] = vtn_get_nir_ssa(b, w[3 + i]);
4614                vtn_assert(glsl_get_bit_size(type->type) == srcs[i]->bit_size);
4615             }
4616             ssa->def =
4617                vtn_vector_construct(b, glsl_get_vector_elements(type->type),
4618                                     elems, srcs);
4619          }
4620       } else {
4621          ssa->elems = vtn_alloc_array(b, struct vtn_ssa_value *, type->length);
4622          if (opcode == SpvOpCompositeConstructReplicateEXT) {
4623             struct vtn_ssa_value *elem = vtn_ssa_value(b, w[3]);
4624             for (unsigned i = 0; i < type->length; i++)
4625                ssa->elems[i] = elem;
4626          } else {
4627             vtn_fail_if(elems != type->length,
4628                         "%s has %u constituents, expected %u",
4629                         spirv_op_to_string(opcode), elems, type->length);
4630             for (unsigned i = 0; i < elems; i++)
4631                ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
4632          }
4633       }
4634       break;
4635    }
4636    case SpvOpCompositeExtract:
4637       ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
4638                                   w + 4, count - 4);
4639       break;
4640 
4641    case SpvOpCompositeInsert:
4642       ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
4643                                  vtn_ssa_value(b, w[3]),
4644                                  w + 5, count - 5);
4645       break;
4646 
4647    case SpvOpCopyLogical: {
4648       ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
4649       struct vtn_type *dst_type = vtn_get_value_type(b, w[2]);
4650       vtn_assert(vtn_types_compatible(b, type, dst_type));
4651       ssa->type = glsl_get_bare_type(dst_type->type);
4652       break;
4653    }
4654    case SpvOpCopyObject:
4655    case SpvOpExpectKHR:
4656       vtn_copy_value(b, w[3], w[2]);
4657       return;
4658 
4659    default:
4660       vtn_fail_with_opcode("unknown composite operation", opcode);
4661    }
4662 
4663    vtn_push_ssa_value(b, w[2], ssa);
4664 }
4665 
4666 static void
vtn_handle_barrier(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4667 vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
4668                    const uint32_t *w, UNUSED unsigned count)
4669 {
4670    switch (opcode) {
4671    case SpvOpEmitVertex:
4672    case SpvOpEmitStreamVertex:
4673    case SpvOpEndPrimitive:
4674    case SpvOpEndStreamPrimitive: {
4675       unsigned stream = 0;
4676       if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
4677          stream = vtn_constant_uint(b, w[1]);
4678 
4679       switch (opcode) {
4680       case SpvOpEmitStreamVertex:
4681       case SpvOpEmitVertex:
4682          nir_emit_vertex(&b->nb, stream);
4683          break;
4684       case SpvOpEndPrimitive:
4685       case SpvOpEndStreamPrimitive:
4686          nir_end_primitive(&b->nb, stream);
4687          break;
4688       default:
4689          unreachable("Invalid opcode");
4690       }
4691       break;
4692    }
4693 
4694    case SpvOpMemoryBarrier: {
4695       SpvScope scope = vtn_constant_uint(b, w[1]);
4696       SpvMemorySemanticsMask semantics = vtn_constant_uint(b, w[2]);
4697       vtn_emit_memory_barrier(b, scope, semantics);
4698       return;
4699    }
4700 
4701    case SpvOpControlBarrier: {
4702       SpvScope execution_scope = vtn_constant_uint(b, w[1]);
4703       SpvScope memory_scope = vtn_constant_uint(b, w[2]);
4704       SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]);
4705 
4706       /* GLSLang, prior to commit 8297936dd6eb3, emitted OpControlBarrier with
4707        * memory semantics of None for GLSL barrier().
4708        * And before that, prior to c3f1cdfa, emitted the OpControlBarrier with
4709        * Device instead of Workgroup for execution scope.
4710        */
4711       if (b->wa_glslang_cs_barrier &&
4712           b->nb.shader->info.stage == MESA_SHADER_COMPUTE &&
4713           (execution_scope == SpvScopeWorkgroup ||
4714            execution_scope == SpvScopeDevice) &&
4715           memory_semantics == SpvMemorySemanticsMaskNone) {
4716          execution_scope = SpvScopeWorkgroup;
4717          memory_scope = SpvScopeWorkgroup;
4718          memory_semantics = SpvMemorySemanticsAcquireReleaseMask |
4719                             SpvMemorySemanticsWorkgroupMemoryMask;
4720       }
4721 
4722       /* From the SPIR-V spec:
4723        *
4724        *    "When used with the TessellationControl execution model, it also
4725        *    implicitly synchronizes the Output Storage Class: Writes to Output
4726        *    variables performed by any invocation executed prior to a
4727        *    OpControlBarrier will be visible to any other invocation after
4728        *    return from that OpControlBarrier."
4729        *
4730        * The same applies to VK_NV_mesh_shader.
4731        */
4732       if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL ||
4733           b->nb.shader->info.stage == MESA_SHADER_TASK ||
4734           b->nb.shader->info.stage == MESA_SHADER_MESH) {
4735          memory_semantics &= ~(SpvMemorySemanticsAcquireMask |
4736                                SpvMemorySemanticsReleaseMask |
4737                                SpvMemorySemanticsAcquireReleaseMask |
4738                                SpvMemorySemanticsSequentiallyConsistentMask);
4739          memory_semantics |= SpvMemorySemanticsAcquireReleaseMask |
4740                              SpvMemorySemanticsOutputMemoryMask;
4741          if (memory_scope == SpvScopeSubgroup || memory_scope == SpvScopeInvocation)
4742             memory_scope = SpvScopeWorkgroup;
4743       }
4744 
4745       vtn_emit_scoped_control_barrier(b, execution_scope, memory_scope,
4746                                       memory_semantics);
4747       break;
4748    }
4749 
4750    default:
4751       unreachable("unknown barrier instruction");
4752    }
4753 }
4754 
4755 static enum tess_primitive_mode
tess_primitive_mode_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4756 tess_primitive_mode_from_spv_execution_mode(struct vtn_builder *b,
4757                                             SpvExecutionMode mode)
4758 {
4759    switch (mode) {
4760    case SpvExecutionModeTriangles:
4761       return TESS_PRIMITIVE_TRIANGLES;
4762    case SpvExecutionModeQuads:
4763       return TESS_PRIMITIVE_QUADS;
4764    case SpvExecutionModeIsolines:
4765       return TESS_PRIMITIVE_ISOLINES;
4766    default:
4767       vtn_fail("Invalid tess primitive type: %s (%u)",
4768                spirv_executionmode_to_string(mode), mode);
4769    }
4770 }
4771 
4772 static enum mesa_prim
primitive_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4773 primitive_from_spv_execution_mode(struct vtn_builder *b,
4774                                   SpvExecutionMode mode)
4775 {
4776    switch (mode) {
4777    case SpvExecutionModeInputPoints:
4778    case SpvExecutionModeOutputPoints:
4779       return MESA_PRIM_POINTS;
4780    case SpvExecutionModeInputLines:
4781    case SpvExecutionModeOutputLinesNV:
4782       return MESA_PRIM_LINES;
4783    case SpvExecutionModeInputLinesAdjacency:
4784       return MESA_PRIM_LINES_ADJACENCY;
4785    case SpvExecutionModeTriangles:
4786    case SpvExecutionModeOutputTrianglesNV:
4787       return MESA_PRIM_TRIANGLES;
4788    case SpvExecutionModeInputTrianglesAdjacency:
4789       return MESA_PRIM_TRIANGLES_ADJACENCY;
4790    case SpvExecutionModeQuads:
4791       return MESA_PRIM_QUADS;
4792    case SpvExecutionModeOutputLineStrip:
4793       return MESA_PRIM_LINE_STRIP;
4794    case SpvExecutionModeOutputTriangleStrip:
4795       return MESA_PRIM_TRIANGLE_STRIP;
4796    default:
4797       vtn_fail("Invalid primitive type: %s (%u)",
4798                spirv_executionmode_to_string(mode), mode);
4799    }
4800 }
4801 
4802 static unsigned
vertices_in_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4803 vertices_in_from_spv_execution_mode(struct vtn_builder *b,
4804                                     SpvExecutionMode mode)
4805 {
4806    switch (mode) {
4807    case SpvExecutionModeInputPoints:
4808       return 1;
4809    case SpvExecutionModeInputLines:
4810       return 2;
4811    case SpvExecutionModeInputLinesAdjacency:
4812       return 4;
4813    case SpvExecutionModeTriangles:
4814       return 3;
4815    case SpvExecutionModeInputTrianglesAdjacency:
4816       return 6;
4817    default:
4818       vtn_fail("Invalid GS input mode: %s (%u)",
4819                spirv_executionmode_to_string(mode), mode);
4820    }
4821 }
4822 
4823 gl_shader_stage
vtn_stage_for_execution_model(SpvExecutionModel model)4824 vtn_stage_for_execution_model(SpvExecutionModel model)
4825 {
4826    switch (model) {
4827    case SpvExecutionModelVertex:
4828       return MESA_SHADER_VERTEX;
4829    case SpvExecutionModelTessellationControl:
4830       return MESA_SHADER_TESS_CTRL;
4831    case SpvExecutionModelTessellationEvaluation:
4832       return MESA_SHADER_TESS_EVAL;
4833    case SpvExecutionModelGeometry:
4834       return MESA_SHADER_GEOMETRY;
4835    case SpvExecutionModelFragment:
4836       return MESA_SHADER_FRAGMENT;
4837    case SpvExecutionModelGLCompute:
4838       return MESA_SHADER_COMPUTE;
4839    case SpvExecutionModelKernel:
4840       return MESA_SHADER_KERNEL;
4841    case SpvExecutionModelRayGenerationKHR:
4842       return MESA_SHADER_RAYGEN;
4843    case SpvExecutionModelAnyHitKHR:
4844       return MESA_SHADER_ANY_HIT;
4845    case SpvExecutionModelClosestHitKHR:
4846       return MESA_SHADER_CLOSEST_HIT;
4847    case SpvExecutionModelMissKHR:
4848       return MESA_SHADER_MISS;
4849    case SpvExecutionModelIntersectionKHR:
4850       return MESA_SHADER_INTERSECTION;
4851    case SpvExecutionModelCallableKHR:
4852        return MESA_SHADER_CALLABLE;
4853    case SpvExecutionModelTaskNV:
4854    case SpvExecutionModelTaskEXT:
4855       return MESA_SHADER_TASK;
4856    case SpvExecutionModelMeshNV:
4857    case SpvExecutionModelMeshEXT:
4858       return MESA_SHADER_MESH;
4859    default:
4860       return MESA_SHADER_NONE;
4861    }
4862 }
4863 
4864 void
vtn_handle_entry_point(struct vtn_builder * b,const uint32_t * w,unsigned count)4865 vtn_handle_entry_point(struct vtn_builder *b, const uint32_t *w,
4866                        unsigned count)
4867 {
4868    struct vtn_value *entry_point = &b->values[w[2]];
4869    /* Let this be a name label regardless */
4870    unsigned name_words;
4871    entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
4872    entry_point->is_entrypoint = true;
4873 
4874    gl_shader_stage stage = vtn_stage_for_execution_model(w[1]);
4875    vtn_fail_if(stage == MESA_SHADER_NONE,
4876                "Unsupported execution model: %s (%u)",
4877                spirv_executionmodel_to_string(w[1]), w[1]);
4878    if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
4879        stage != b->entry_point_stage)
4880       return;
4881 
4882    vtn_assert(b->entry_point == NULL);
4883    b->entry_point = entry_point;
4884 
4885    /* Entry points enumerate which global variables are used. */
4886    size_t start = 3 + name_words;
4887    b->interface_ids_count = count - start;
4888    b->interface_ids = vtn_alloc_array(b, uint32_t, b->interface_ids_count);
4889    memcpy(b->interface_ids, &w[start], b->interface_ids_count * 4);
4890    qsort(b->interface_ids, b->interface_ids_count, 4, cmp_uint32_t);
4891 }
4892 
4893 static bool
vtn_handle_preamble_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4894 vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
4895                                 const uint32_t *w, unsigned count)
4896 {
4897    switch (opcode) {
4898    case SpvOpString:
4899    case SpvOpSource:
4900    case SpvOpSourceExtension:
4901    case SpvOpSourceContinued:
4902    case SpvOpModuleProcessed:
4903       vtn_handle_debug_text(b, opcode, w, count);
4904       break;
4905 
4906    case SpvOpExtension: {
4907       /* Implementing both NV_mesh_shader and EXT_mesh_shader
4908        * is difficult without knowing which we're dealing with.
4909        * TODO: remove this when we stop supporting NV_mesh_shader.
4910        */
4911       const char *ext_name = (const char *)&w[1];
4912       if (strcmp(ext_name, "SPV_NV_mesh_shader") == 0)
4913          b->shader->info.mesh.nv = true;
4914       break;
4915    }
4916 
4917    case SpvOpCapability: {
4918       SpvCapability cap = w[1];
4919       switch (cap) {
4920       case SpvCapabilitySubgroupDispatch:
4921          /* Missing :
4922           *   - SpvOpGetKernelLocalSizeForSubgroupCount
4923           *   - SpvOpGetKernelMaxNumSubgroups
4924           */
4925          vtn_warn("Not fully supported capability: %s",
4926                   spirv_capability_to_string(cap));
4927          break;
4928 
4929       case SpvCapabilityOptNoneEXT:
4930          /* This is a "strong request" not to optimize a function, usually
4931           * because it's a compute shader and the workgroup size etc is
4932           * manually tuned and we shouldn't risk undoing it. Someday!
4933           */
4934          vtn_info("Not fully supported capability: %s",
4935                   spirv_capability_to_string(cap));
4936          break;
4937 
4938       default:
4939          vtn_fail_if(!spirv_capabilities_get(&implemented_capabilities, cap),
4940                      "Unimplemented SPIR-V capability: %s (%u)",
4941                      spirv_capability_to_string(cap), cap);
4942       }
4943 
4944       if (!spirv_capabilities_get(&b->supported_capabilities, cap)) {
4945          vtn_warn("Unsupported SPIR-V capability: %s (%u)",
4946                   spirv_capability_to_string(cap), cap);
4947       }
4948 
4949       spirv_capabilities_set(&b->enabled_capabilities, cap, true);
4950       break;
4951    }
4952 
4953    case SpvOpExtInstImport:
4954       vtn_handle_extension(b, opcode, w, count);
4955       break;
4956 
4957    case SpvOpMemoryModel:
4958       switch (w[1]) {
4959       case SpvAddressingModelPhysical32:
4960          vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4961                      "AddressingModelPhysical32 only supported for kernels");
4962          b->shader->info.cs.ptr_size = 32;
4963          b->physical_ptrs = true;
4964          assert(nir_address_format_bit_size(b->options->global_addr_format) == 32);
4965          assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4966          assert(nir_address_format_bit_size(b->options->shared_addr_format) == 32);
4967          assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4968          assert(nir_address_format_bit_size(b->options->constant_addr_format) == 32);
4969          assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4970          break;
4971       case SpvAddressingModelPhysical64:
4972          vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4973                      "AddressingModelPhysical64 only supported for kernels");
4974          b->shader->info.cs.ptr_size = 64;
4975          b->physical_ptrs = true;
4976          assert(nir_address_format_bit_size(b->options->global_addr_format) == 64);
4977          assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4978          assert(nir_address_format_bit_size(b->options->shared_addr_format) == 64);
4979          assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4980          assert(nir_address_format_bit_size(b->options->constant_addr_format) == 64);
4981          assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4982          break;
4983       case SpvAddressingModelLogical:
4984          vtn_fail_if(b->shader->info.stage == MESA_SHADER_KERNEL,
4985                      "AddressingModelLogical only supported for shaders");
4986          b->physical_ptrs = false;
4987          break;
4988       case SpvAddressingModelPhysicalStorageBuffer64:
4989          vtn_fail_if(!b->supported_capabilities.PhysicalStorageBufferAddresses,
4990                      "AddressingModelPhysicalStorageBuffer64 not supported");
4991          break;
4992       default:
4993          vtn_fail("Unknown addressing model: %s (%u)",
4994                   spirv_addressingmodel_to_string(w[1]), w[1]);
4995          break;
4996       }
4997 
4998       b->mem_model = w[2];
4999       switch (w[2]) {
5000       case SpvMemoryModelSimple:
5001       case SpvMemoryModelGLSL450:
5002       case SpvMemoryModelOpenCL:
5003          break;
5004       case SpvMemoryModelVulkan:
5005          vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
5006                      "Vulkan memory model is unsupported by this driver");
5007          break;
5008       default:
5009          vtn_fail("Unsupported memory model: %s",
5010                   spirv_memorymodel_to_string(w[2]));
5011          break;
5012       }
5013       break;
5014 
5015    case SpvOpEntryPoint:
5016       vtn_handle_entry_point(b, w, count);
5017       break;
5018 
5019    case SpvOpName:
5020       b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
5021       break;
5022 
5023    case SpvOpMemberName:
5024    case SpvOpExecutionMode:
5025    case SpvOpExecutionModeId:
5026    case SpvOpDecorationGroup:
5027    case SpvOpDecorate:
5028    case SpvOpDecorateId:
5029    case SpvOpMemberDecorate:
5030    case SpvOpGroupDecorate:
5031    case SpvOpGroupMemberDecorate:
5032    case SpvOpDecorateString:
5033    case SpvOpMemberDecorateString:
5034       vtn_handle_decoration(b, opcode, w, count);
5035       break;
5036 
5037    case SpvOpExtInst:
5038    case SpvOpExtInstWithForwardRefsKHR: {
5039       struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5040       if (val->ext_handler == vtn_handle_non_semantic_instruction) {
5041          /* NonSemantic extended instructions are acceptable in preamble. */
5042          vtn_handle_non_semantic_instruction(b, w[4], w, count);
5043          return true;
5044       } else {
5045          return false; /* End of preamble. */
5046       }
5047    }
5048 
5049    default:
5050       return false; /* End of preamble */
5051    }
5052 
5053    return true;
5054 }
5055 
5056 void
vtn_handle_debug_text(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5057 vtn_handle_debug_text(struct vtn_builder *b, SpvOp opcode,
5058                       const uint32_t *w, unsigned count)
5059 {
5060    switch (opcode) {
5061    case SpvOpString:
5062       vtn_push_value(b, w[1], vtn_value_type_string)->str =
5063          vtn_string_literal(b, &w[2], count - 2, NULL);
5064       break;
5065 
5066    case SpvOpSource: {
5067       const char *lang;
5068       switch (w[1]) {
5069       default:
5070       case SpvSourceLanguageUnknown:      lang = "unknown";    break;
5071       case SpvSourceLanguageESSL:         lang = "ESSL";       break;
5072       case SpvSourceLanguageGLSL:         lang = "GLSL";       break;
5073       case SpvSourceLanguageOpenCL_C:     lang = "OpenCL C";   break;
5074       case SpvSourceLanguageOpenCL_CPP:   lang = "OpenCL C++"; break;
5075       case SpvSourceLanguageHLSL:         lang = "HLSL";       break;
5076       }
5077 
5078       uint32_t version = w[2];
5079 
5080       const char *file =
5081          (count > 3) ? vtn_value(b, w[3], vtn_value_type_string)->str : "";
5082 
5083       vtn_info("Parsing SPIR-V from %s %u source file %s", lang, version, file);
5084 
5085       b->source_lang = w[1];
5086       break;
5087    }
5088 
5089    case SpvOpSourceExtension:
5090    case SpvOpSourceContinued:
5091    case SpvOpModuleProcessed:
5092       /* Unhandled, but these are for debug so that's ok. */
5093       break;
5094 
5095    default:
5096       unreachable("Unhandled opcode");
5097    }
5098 }
5099 
5100 static void
vtn_handle_execution_mode(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5101 vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
5102                           const struct vtn_decoration *mode, UNUSED void *data)
5103 {
5104    vtn_assert(b->entry_point == entry_point);
5105 
5106    switch(mode->exec_mode) {
5107    case SpvExecutionModeOriginUpperLeft:
5108    case SpvExecutionModeOriginLowerLeft:
5109       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5110       b->shader->info.fs.origin_upper_left =
5111          (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
5112       break;
5113 
5114    case SpvExecutionModeEarlyFragmentTests:
5115       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5116       b->shader->info.fs.early_fragment_tests = true;
5117       break;
5118 
5119    case SpvExecutionModePostDepthCoverage:
5120       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5121       b->shader->info.fs.post_depth_coverage = true;
5122       break;
5123 
5124    case SpvExecutionModeInvocations:
5125       vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5126       b->shader->info.gs.invocations = MAX2(1, mode->operands[0]);
5127       break;
5128 
5129    case SpvExecutionModeDepthReplacing:
5130       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5131       if (b->shader->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE)
5132          b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
5133       break;
5134    case SpvExecutionModeDepthGreater:
5135       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5136       b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
5137       break;
5138    case SpvExecutionModeDepthLess:
5139       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5140       b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
5141       break;
5142    case SpvExecutionModeDepthUnchanged:
5143       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5144       b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
5145       break;
5146 
5147    case SpvExecutionModeLocalSizeHint:
5148       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5149       b->shader->info.cs.workgroup_size_hint[0] = mode->operands[0];
5150       b->shader->info.cs.workgroup_size_hint[1] = mode->operands[1];
5151       b->shader->info.cs.workgroup_size_hint[2] = mode->operands[2];
5152       break;
5153 
5154    case SpvExecutionModeLocalSize:
5155       if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5156          b->shader->info.workgroup_size[0] = mode->operands[0];
5157          b->shader->info.workgroup_size[1] = mode->operands[1];
5158          b->shader->info.workgroup_size[2] = mode->operands[2];
5159       } else {
5160          vtn_fail("Execution mode LocalSize not supported in stage %s",
5161                   _mesa_shader_stage_to_string(b->shader->info.stage));
5162       }
5163       break;
5164 
5165    case SpvExecutionModeOutputVertices:
5166       switch (b->shader->info.stage) {
5167       case MESA_SHADER_TESS_CTRL:
5168       case MESA_SHADER_TESS_EVAL:
5169          b->shader->info.tess.tcs_vertices_out = mode->operands[0];
5170          break;
5171       case MESA_SHADER_GEOMETRY:
5172          b->shader->info.gs.vertices_out = mode->operands[0];
5173          break;
5174       case MESA_SHADER_MESH:
5175          b->shader->info.mesh.max_vertices_out = mode->operands[0];
5176          break;
5177       default:
5178          vtn_fail("Execution mode OutputVertices not supported in stage %s",
5179                   _mesa_shader_stage_to_string(b->shader->info.stage));
5180          break;
5181       }
5182       break;
5183 
5184    case SpvExecutionModeInputPoints:
5185    case SpvExecutionModeInputLines:
5186    case SpvExecutionModeInputLinesAdjacency:
5187    case SpvExecutionModeTriangles:
5188    case SpvExecutionModeInputTrianglesAdjacency:
5189    case SpvExecutionModeQuads:
5190    case SpvExecutionModeIsolines:
5191       if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5192           b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
5193          b->shader->info.tess._primitive_mode =
5194             tess_primitive_mode_from_spv_execution_mode(b, mode->exec_mode);
5195       } else {
5196          vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5197          b->shader->info.gs.vertices_in =
5198             vertices_in_from_spv_execution_mode(b, mode->exec_mode);
5199          b->shader->info.gs.input_primitive =
5200             primitive_from_spv_execution_mode(b, mode->exec_mode);
5201       }
5202       break;
5203 
5204    case SpvExecutionModeOutputPrimitivesNV:
5205       vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5206       b->shader->info.mesh.max_primitives_out = mode->operands[0];
5207       break;
5208 
5209    case SpvExecutionModeOutputLinesNV:
5210    case SpvExecutionModeOutputTrianglesNV:
5211       vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5212       b->shader->info.mesh.primitive_type =
5213          primitive_from_spv_execution_mode(b, mode->exec_mode);
5214       break;
5215 
5216    case SpvExecutionModeOutputPoints: {
5217       const unsigned primitive =
5218          primitive_from_spv_execution_mode(b, mode->exec_mode);
5219 
5220       switch (b->shader->info.stage) {
5221       case MESA_SHADER_GEOMETRY:
5222          b->shader->info.gs.output_primitive = primitive;
5223          break;
5224       case MESA_SHADER_MESH:
5225          b->shader->info.mesh.primitive_type = primitive;
5226          break;
5227       default:
5228          vtn_fail("Execution mode OutputPoints not supported in stage %s",
5229                   _mesa_shader_stage_to_string(b->shader->info.stage));
5230          break;
5231       }
5232       break;
5233    }
5234 
5235    case SpvExecutionModeOutputLineStrip:
5236    case SpvExecutionModeOutputTriangleStrip:
5237       vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5238       b->shader->info.gs.output_primitive =
5239          primitive_from_spv_execution_mode(b, mode->exec_mode);
5240       break;
5241 
5242    case SpvExecutionModeSpacingEqual:
5243       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5244                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5245       b->shader->info.tess.spacing = TESS_SPACING_EQUAL;
5246       break;
5247    case SpvExecutionModeSpacingFractionalEven:
5248       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5249                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5250       b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN;
5251       break;
5252    case SpvExecutionModeSpacingFractionalOdd:
5253       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5254                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5255       b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD;
5256       break;
5257    case SpvExecutionModeVertexOrderCw:
5258       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5259                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5260       b->shader->info.tess.ccw = false;
5261       break;
5262    case SpvExecutionModeVertexOrderCcw:
5263       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5264                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5265       b->shader->info.tess.ccw = true;
5266       break;
5267    case SpvExecutionModePointMode:
5268       vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5269                  b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5270       b->shader->info.tess.point_mode = true;
5271       break;
5272 
5273    case SpvExecutionModePixelCenterInteger:
5274       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5275       b->shader->info.fs.pixel_center_integer = true;
5276       break;
5277 
5278    case SpvExecutionModeXfb:
5279       b->shader->info.has_transform_feedback_varyings = true;
5280       break;
5281 
5282    case SpvExecutionModeVecTypeHint:
5283       break; /* OpenCL */
5284 
5285    case SpvExecutionModeContractionOff:
5286       if (b->shader->info.stage != MESA_SHADER_KERNEL)
5287          vtn_warn("ExectionMode only allowed for CL-style kernels: %s",
5288                   spirv_executionmode_to_string(mode->exec_mode));
5289       else
5290          b->exact = true;
5291       break;
5292 
5293    case SpvExecutionModeStencilRefReplacingEXT:
5294       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5295       break;
5296 
5297    case SpvExecutionModeDerivativeGroupQuadsKHR:
5298       vtn_assert(gl_shader_stage_uses_workgroup(b->shader->info.stage));
5299       b->shader->info.derivative_group = DERIVATIVE_GROUP_QUADS;
5300       break;
5301 
5302    case SpvExecutionModeDerivativeGroupLinearKHR:
5303       vtn_assert(gl_shader_stage_uses_workgroup(b->shader->info.stage));
5304       b->shader->info.derivative_group = DERIVATIVE_GROUP_LINEAR;
5305       break;
5306 
5307    case SpvExecutionModePixelInterlockOrderedEXT:
5308       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5309       b->shader->info.fs.pixel_interlock_ordered = true;
5310       break;
5311 
5312    case SpvExecutionModePixelInterlockUnorderedEXT:
5313       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5314       b->shader->info.fs.pixel_interlock_unordered = true;
5315       break;
5316 
5317    case SpvExecutionModeSampleInterlockOrderedEXT:
5318       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5319       b->shader->info.fs.sample_interlock_ordered = true;
5320       break;
5321 
5322    case SpvExecutionModeSampleInterlockUnorderedEXT:
5323       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5324       b->shader->info.fs.sample_interlock_unordered = true;
5325       break;
5326 
5327    case SpvExecutionModeDenormPreserve:
5328    case SpvExecutionModeDenormFlushToZero:
5329    case SpvExecutionModeSignedZeroInfNanPreserve:
5330    case SpvExecutionModeRoundingModeRTE:
5331    case SpvExecutionModeRoundingModeRTZ: {
5332       unsigned execution_mode = 0;
5333       switch (mode->exec_mode) {
5334       case SpvExecutionModeDenormPreserve:
5335          switch (mode->operands[0]) {
5336          case 16: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP16; break;
5337          case 32: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP32; break;
5338          case 64: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP64; break;
5339          default: vtn_fail("Floating point type not supported");
5340          }
5341          break;
5342       case SpvExecutionModeDenormFlushToZero:
5343          switch (mode->operands[0]) {
5344          case 16: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16; break;
5345          case 32: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32; break;
5346          case 64: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64; break;
5347          default: vtn_fail("Floating point type not supported");
5348          }
5349          break;
5350       case SpvExecutionModeSignedZeroInfNanPreserve:
5351          switch (mode->operands[0]) {
5352          case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break;
5353          case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break;
5354          case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break;
5355          default: vtn_fail("Floating point type not supported");
5356          }
5357          break;
5358       case SpvExecutionModeRoundingModeRTE:
5359          switch (mode->operands[0]) {
5360          case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16; break;
5361          case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32; break;
5362          case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64; break;
5363          default: vtn_fail("Floating point type not supported");
5364          }
5365          break;
5366       case SpvExecutionModeRoundingModeRTZ:
5367          switch (mode->operands[0]) {
5368          case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16; break;
5369          case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32; break;
5370          case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; break;
5371          default: vtn_fail("Floating point type not supported");
5372          }
5373          break;
5374       default:
5375          break;
5376       }
5377 
5378       b->shader->info.float_controls_execution_mode |= execution_mode;
5379 
5380       for (unsigned bit_size = 16; bit_size <= 64; bit_size *= 2) {
5381          vtn_fail_if(nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, bit_size) &&
5382                      nir_is_denorm_preserve(b->shader->info.float_controls_execution_mode, bit_size),
5383                      "Cannot flush to zero and preserve denorms for the same bit size.");
5384          vtn_fail_if(nir_is_rounding_mode_rtne(b->shader->info.float_controls_execution_mode, bit_size) &&
5385                      nir_is_rounding_mode_rtz(b->shader->info.float_controls_execution_mode, bit_size),
5386                      "Cannot set rounding mode to RTNE and RTZ for the same bit size.");
5387       }
5388       break;
5389    }
5390 
5391    case SpvExecutionModeMaximallyReconvergesKHR:
5392       b->shader->info.maximally_reconverges = true;
5393       break;
5394 
5395    case SpvExecutionModeLocalSizeId:
5396    case SpvExecutionModeLocalSizeHintId:
5397    case SpvExecutionModeSubgroupsPerWorkgroupId:
5398    case SpvExecutionModeFPFastMathDefault:
5399    case SpvExecutionModeMaxNodeRecursionAMDX:
5400    case SpvExecutionModeStaticNumWorkgroupsAMDX:
5401    case SpvExecutionModeMaxNumWorkgroupsAMDX:
5402    case SpvExecutionModeShaderIndexAMDX:
5403       /* Handled later by vtn_handle_execution_mode_id(). */
5404       break;
5405 
5406    case SpvExecutionModeSubgroupSize:
5407       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5408       vtn_assert(b->shader->info.subgroup_size == SUBGROUP_SIZE_VARYING);
5409       b->shader->info.subgroup_size = mode->operands[0];
5410       break;
5411 
5412    case SpvExecutionModeSubgroupsPerWorkgroup:
5413       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5414       b->shader->info.num_subgroups = mode->operands[0];
5415       break;
5416 
5417    case SpvExecutionModeSubgroupUniformControlFlowKHR:
5418       /* Nothing to do here */
5419       break;
5420 
5421    case SpvExecutionModeEarlyAndLateFragmentTestsAMD:
5422       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5423       b->shader->info.fs.early_and_late_fragment_tests = true;
5424       break;
5425 
5426    case SpvExecutionModeStencilRefGreaterFrontAMD:
5427       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5428       b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_GREATER;
5429       break;
5430 
5431    case SpvExecutionModeStencilRefLessFrontAMD:
5432       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5433       b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_LESS;
5434       break;
5435 
5436    case SpvExecutionModeStencilRefUnchangedFrontAMD:
5437       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5438       b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5439       break;
5440 
5441    case SpvExecutionModeStencilRefGreaterBackAMD:
5442       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5443       b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_GREATER;
5444       break;
5445 
5446    case SpvExecutionModeStencilRefLessBackAMD:
5447       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5448       b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_LESS;
5449       break;
5450 
5451    case SpvExecutionModeStencilRefUnchangedBackAMD:
5452       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5453       b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5454       break;
5455 
5456    case SpvExecutionModeRequireFullQuadsKHR:
5457       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5458       b->shader->info.fs.require_full_quads = true;
5459       break;
5460 
5461    case SpvExecutionModeQuadDerivativesKHR:
5462       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5463       b->shader->info.fs.quad_derivatives = true;
5464       break;
5465 
5466    case SpvExecutionModeCoalescingAMDX:
5467       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5468       b->shader->info.cs.workgroup_count[0] = 1;
5469       b->shader->info.cs.workgroup_count[1] = 1;
5470       b->shader->info.cs.workgroup_count[2] = 1;
5471       break;
5472 
5473    default:
5474       vtn_fail("Unhandled execution mode: %s (%u)",
5475                spirv_executionmode_to_string(mode->exec_mode),
5476                mode->exec_mode);
5477    }
5478 }
5479 
5480 static void
vtn_handle_execution_mode_id(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5481 vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_point,
5482                              const struct vtn_decoration *mode, UNUSED void *data)
5483 {
5484 
5485    vtn_assert(b->entry_point == entry_point);
5486 
5487    switch (mode->exec_mode) {
5488    case SpvExecutionModeLocalSizeId:
5489       if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5490          b->shader->info.workgroup_size[0] = vtn_constant_uint(b, mode->operands[0]);
5491          b->shader->info.workgroup_size[1] = vtn_constant_uint(b, mode->operands[1]);
5492          b->shader->info.workgroup_size[2] = vtn_constant_uint(b, mode->operands[2]);
5493       } else {
5494          vtn_fail("Execution mode LocalSizeId not supported in stage %s",
5495                   _mesa_shader_stage_to_string(b->shader->info.stage));
5496       }
5497       break;
5498 
5499    case SpvExecutionModeLocalSizeHintId:
5500       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5501       b->shader->info.cs.workgroup_size_hint[0] = vtn_constant_uint(b, mode->operands[0]);
5502       b->shader->info.cs.workgroup_size_hint[1] = vtn_constant_uint(b, mode->operands[1]);
5503       b->shader->info.cs.workgroup_size_hint[2] = vtn_constant_uint(b, mode->operands[2]);
5504       break;
5505 
5506    case SpvExecutionModeSubgroupsPerWorkgroupId:
5507       vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5508       b->shader->info.num_subgroups = vtn_constant_uint(b, mode->operands[0]);
5509       break;
5510 
5511    case SpvExecutionModeFPFastMathDefault: {
5512       struct vtn_type *type = vtn_get_type(b, mode->operands[0]);
5513       SpvFPFastMathModeMask flags = vtn_constant_uint(b, mode->operands[1]);
5514 
5515       SpvFPFastMathModeMask can_fast_math =
5516          SpvFPFastMathModeAllowRecipMask |
5517          SpvFPFastMathModeAllowContractMask |
5518          SpvFPFastMathModeAllowReassocMask |
5519          SpvFPFastMathModeAllowTransformMask;
5520       if ((flags & can_fast_math) != can_fast_math)
5521          b->exact = true;
5522 
5523       unsigned execution_mode = 0;
5524       if (!(flags & SpvFPFastMathModeNotNaNMask)) {
5525          switch (glsl_get_bit_size(type->type)) {
5526          case 16: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP16; break;
5527          case 32: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP32; break;
5528          case 64: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP64; break;
5529          }
5530       }
5531       if (!(flags & SpvFPFastMathModeNotInfMask)) {
5532          switch (glsl_get_bit_size(type->type)) {
5533          case 16: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP16; break;
5534          case 32: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP32; break;
5535          case 64: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP64; break;
5536          }
5537       }
5538       if (!(flags & SpvFPFastMathModeNSZMask)) {
5539          switch (glsl_get_bit_size(type->type)) {
5540          case 16: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16; break;
5541          case 32: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32; break;
5542          case 64: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64; break;
5543          }
5544       }
5545       b->shader->info.float_controls_execution_mode |= execution_mode;
5546       break;
5547    }
5548 
5549    case SpvExecutionModeMaxNodeRecursionAMDX:
5550       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5551       break;
5552 
5553    case SpvExecutionModeStaticNumWorkgroupsAMDX:
5554       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5555       b->shader->info.cs.workgroup_count[0] = vtn_constant_uint(b, mode->operands[0]);
5556       b->shader->info.cs.workgroup_count[1] = vtn_constant_uint(b, mode->operands[1]);
5557       b->shader->info.cs.workgroup_count[2] = vtn_constant_uint(b, mode->operands[2]);
5558       assert(b->shader->info.cs.workgroup_count[0]);
5559       assert(b->shader->info.cs.workgroup_count[1]);
5560       assert(b->shader->info.cs.workgroup_count[2]);
5561       break;
5562 
5563    case SpvExecutionModeMaxNumWorkgroupsAMDX:
5564       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5565       break;
5566 
5567    case SpvExecutionModeShaderIndexAMDX:
5568       vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5569       b->shader->info.cs.shader_index = vtn_constant_uint(b, mode->operands[0]);
5570       break;
5571 
5572    default:
5573       /* Nothing to do.  Literal execution modes already handled by
5574        * vtn_handle_execution_mode(). */
5575       break;
5576    }
5577 }
5578 
5579 static bool
vtn_handle_variable_or_type_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5580 vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
5581                                         const uint32_t *w, unsigned count)
5582 {
5583    vtn_set_instruction_result_type(b, opcode, w, count);
5584 
5585    switch (opcode) {
5586    case SpvOpSource:
5587    case SpvOpSourceContinued:
5588    case SpvOpSourceExtension:
5589    case SpvOpExtension:
5590    case SpvOpCapability:
5591    case SpvOpExtInstImport:
5592    case SpvOpMemoryModel:
5593    case SpvOpEntryPoint:
5594    case SpvOpExecutionMode:
5595    case SpvOpString:
5596    case SpvOpName:
5597    case SpvOpMemberName:
5598    case SpvOpDecorationGroup:
5599    case SpvOpDecorate:
5600    case SpvOpDecorateId:
5601    case SpvOpMemberDecorate:
5602    case SpvOpGroupDecorate:
5603    case SpvOpGroupMemberDecorate:
5604    case SpvOpDecorateString:
5605    case SpvOpMemberDecorateString:
5606       vtn_fail("Invalid opcode types and variables section");
5607       break;
5608 
5609    case SpvOpTypeVoid:
5610    case SpvOpTypeBool:
5611    case SpvOpTypeInt:
5612    case SpvOpTypeFloat:
5613    case SpvOpTypeVector:
5614    case SpvOpTypeMatrix:
5615    case SpvOpTypeImage:
5616    case SpvOpTypeSampler:
5617    case SpvOpTypeSampledImage:
5618    case SpvOpTypeArray:
5619    case SpvOpTypeRuntimeArray:
5620    case SpvOpTypeStruct:
5621    case SpvOpTypeOpaque:
5622    case SpvOpTypePointer:
5623    case SpvOpTypeForwardPointer:
5624    case SpvOpTypeFunction:
5625    case SpvOpTypeEvent:
5626    case SpvOpTypeDeviceEvent:
5627    case SpvOpTypeReserveId:
5628    case SpvOpTypeQueue:
5629    case SpvOpTypePipe:
5630    case SpvOpTypeAccelerationStructureKHR:
5631    case SpvOpTypeRayQueryKHR:
5632    case SpvOpTypeCooperativeMatrixKHR:
5633       vtn_handle_type(b, opcode, w, count);
5634       break;
5635 
5636    case SpvOpConstantTrue:
5637    case SpvOpConstantFalse:
5638    case SpvOpConstant:
5639    case SpvOpConstantComposite:
5640    case SpvOpConstantCompositeReplicateEXT:
5641    case SpvOpConstantNull:
5642    case SpvOpSpecConstantTrue:
5643    case SpvOpSpecConstantFalse:
5644    case SpvOpSpecConstant:
5645    case SpvOpSpecConstantComposite:
5646    case SpvOpSpecConstantCompositeReplicateEXT:
5647    case SpvOpSpecConstantOp:
5648       vtn_handle_constant(b, opcode, w, count);
5649       break;
5650 
5651    case SpvOpUndef:
5652    case SpvOpVariable:
5653    case SpvOpConstantSampler:
5654       vtn_handle_variables(b, opcode, w, count);
5655       break;
5656 
5657    case SpvOpExtInst:
5658    case SpvOpExtInstWithForwardRefsKHR: {
5659       struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5660       /* NonSemantic extended instructions are acceptable in preamble, others
5661        * will indicate the end of preamble.
5662        */
5663       return val->ext_handler == vtn_handle_non_semantic_instruction;
5664    }
5665 
5666    default:
5667       return false; /* End of preamble */
5668    }
5669 
5670    return true;
5671 }
5672 
5673 static struct vtn_ssa_value *
vtn_nir_select(struct vtn_builder * b,struct vtn_ssa_value * src0,struct vtn_ssa_value * src1,struct vtn_ssa_value * src2)5674 vtn_nir_select(struct vtn_builder *b, struct vtn_ssa_value *src0,
5675                struct vtn_ssa_value *src1, struct vtn_ssa_value *src2)
5676 {
5677    struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
5678    dest->type = src1->type;
5679 
5680    if (src1->is_variable || src2->is_variable) {
5681       vtn_assert(src1->is_variable && src2->is_variable);
5682 
5683       nir_variable *dest_var =
5684          nir_local_variable_create(b->nb.impl, dest->type, "var_select");
5685       nir_deref_instr *dest_deref = nir_build_deref_var(&b->nb, dest_var);
5686 
5687       nir_push_if(&b->nb, src0->def);
5688       {
5689          nir_deref_instr *src1_deref = vtn_get_deref_for_ssa_value(b, src1);
5690          vtn_local_store(b, vtn_local_load(b, src1_deref, 0), dest_deref, 0);
5691       }
5692       nir_push_else(&b->nb, NULL);
5693       {
5694          nir_deref_instr *src2_deref = vtn_get_deref_for_ssa_value(b, src2);
5695          vtn_local_store(b, vtn_local_load(b, src2_deref, 0), dest_deref, 0);
5696       }
5697       nir_pop_if(&b->nb, NULL);
5698 
5699       vtn_set_ssa_value_var(b, dest, dest_var);
5700    } else if (glsl_type_is_vector_or_scalar(src1->type)) {
5701       dest->def = nir_bcsel(&b->nb, src0->def, src1->def, src2->def);
5702    } else {
5703       unsigned elems = glsl_get_length(src1->type);
5704 
5705       dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
5706       for (unsigned i = 0; i < elems; i++) {
5707          dest->elems[i] = vtn_nir_select(b, src0,
5708                                          src1->elems[i], src2->elems[i]);
5709       }
5710    }
5711 
5712    return dest;
5713 }
5714 
5715 static void
vtn_handle_select(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5716 vtn_handle_select(struct vtn_builder *b, SpvOp opcode,
5717                   const uint32_t *w, unsigned count)
5718 {
5719    /* Handle OpSelect up-front here because it needs to be able to handle
5720     * pointers and not just regular vectors and scalars.
5721     */
5722    struct vtn_value *res_val = vtn_untyped_value(b, w[2]);
5723    struct vtn_value *cond_val = vtn_untyped_value(b, w[3]);
5724    struct vtn_value *obj1_val = vtn_untyped_value(b, w[4]);
5725    struct vtn_value *obj2_val = vtn_untyped_value(b, w[5]);
5726 
5727    vtn_fail_if(obj1_val->type != res_val->type ||
5728                obj2_val->type != res_val->type,
5729                "Object types must match the result type in OpSelect (%%%u = %%%u ? %%%u : %%%u)", w[2], w[3], w[4], w[5]);
5730 
5731    vtn_fail_if((cond_val->type->base_type != vtn_base_type_scalar &&
5732                 cond_val->type->base_type != vtn_base_type_vector) ||
5733                !glsl_type_is_boolean(cond_val->type->type),
5734                "OpSelect must have either a vector of booleans or "
5735                "a boolean as Condition type");
5736 
5737    vtn_fail_if(cond_val->type->base_type == vtn_base_type_vector &&
5738                (res_val->type->base_type != vtn_base_type_vector ||
5739                 res_val->type->length != cond_val->type->length),
5740                "When Condition type in OpSelect is a vector, the Result "
5741                "type must be a vector of the same length");
5742 
5743    switch (res_val->type->base_type) {
5744    case vtn_base_type_scalar:
5745    case vtn_base_type_vector:
5746    case vtn_base_type_matrix:
5747    case vtn_base_type_array:
5748    case vtn_base_type_struct:
5749       /* OK. */
5750       break;
5751    case vtn_base_type_pointer:
5752       /* We need to have actual storage for pointer types. */
5753       vtn_fail_if(res_val->type->type == NULL,
5754                   "Invalid pointer result type for OpSelect");
5755       break;
5756    default:
5757       vtn_fail("Result type of OpSelect must be a scalar, composite, or pointer");
5758    }
5759 
5760    vtn_push_ssa_value(b, w[2],
5761       vtn_nir_select(b, vtn_ssa_value(b, w[3]),
5762                         vtn_ssa_value(b, w[4]),
5763                         vtn_ssa_value(b, w[5])));
5764 }
5765 
5766 static void
vtn_handle_ptr(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5767 vtn_handle_ptr(struct vtn_builder *b, SpvOp opcode,
5768                const uint32_t *w, unsigned count)
5769 {
5770    struct vtn_type *type1 = vtn_get_value_type(b, w[3]);
5771    struct vtn_type *type2 = vtn_get_value_type(b, w[4]);
5772    vtn_fail_if(type1->base_type != vtn_base_type_pointer ||
5773                type2->base_type != vtn_base_type_pointer,
5774                "%s operands must have pointer types",
5775                spirv_op_to_string(opcode));
5776    vtn_fail_if(type1->storage_class != type2->storage_class,
5777                "%s operands must have the same storage class",
5778                spirv_op_to_string(opcode));
5779 
5780    struct vtn_type *vtn_type = vtn_get_type(b, w[1]);
5781    const struct glsl_type *type = vtn_type->type;
5782 
5783    nir_address_format addr_format = vtn_mode_to_address_format(
5784       b, vtn_storage_class_to_mode(b, type1->storage_class, NULL, NULL));
5785 
5786    nir_def *def;
5787 
5788    switch (opcode) {
5789    case SpvOpPtrDiff: {
5790       /* OpPtrDiff returns the difference in number of elements (not byte offset). */
5791       unsigned elem_size, elem_align;
5792       glsl_get_natural_size_align_bytes(type1->pointed->type,
5793                                         &elem_size, &elem_align);
5794 
5795       def = nir_build_addr_isub(&b->nb,
5796                                 vtn_get_nir_ssa(b, w[3]),
5797                                 vtn_get_nir_ssa(b, w[4]),
5798                                 addr_format);
5799       def = nir_idiv(&b->nb, def, nir_imm_intN_t(&b->nb, elem_size, def->bit_size));
5800       def = nir_i2iN(&b->nb, def, glsl_get_bit_size(type));
5801       break;
5802    }
5803 
5804    case SpvOpPtrEqual:
5805    case SpvOpPtrNotEqual: {
5806       def = nir_build_addr_ieq(&b->nb,
5807                                vtn_get_nir_ssa(b, w[3]),
5808                                vtn_get_nir_ssa(b, w[4]),
5809                                addr_format);
5810       if (opcode == SpvOpPtrNotEqual)
5811          def = nir_inot(&b->nb, def);
5812       break;
5813    }
5814 
5815    default:
5816       unreachable("Invalid ptr operation");
5817    }
5818 
5819    vtn_push_nir_ssa(b, w[2], def);
5820 }
5821 
5822 static void
vtn_handle_ray_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5823 vtn_handle_ray_intrinsic(struct vtn_builder *b, SpvOp opcode,
5824                          const uint32_t *w, unsigned count)
5825 {
5826    nir_intrinsic_instr *intrin;
5827 
5828    switch (opcode) {
5829    case SpvOpTraceNV:
5830    case SpvOpTraceRayKHR: {
5831       intrin = nir_intrinsic_instr_create(b->nb.shader,
5832                                           nir_intrinsic_trace_ray);
5833 
5834       /* The sources are in the same order in the NIR intrinsic */
5835       for (unsigned i = 0; i < 10; i++)
5836          intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
5837 
5838       nir_deref_instr *payload;
5839       if (opcode == SpvOpTraceNV)
5840          payload = vtn_get_call_payload_for_location(b, w[11]);
5841       else
5842          payload = vtn_nir_deref(b, w[11]);
5843       intrin->src[10] = nir_src_for_ssa(&payload->def);
5844       nir_builder_instr_insert(&b->nb, &intrin->instr);
5845       break;
5846    }
5847 
5848    case SpvOpReportIntersectionKHR: {
5849       intrin = nir_intrinsic_instr_create(b->nb.shader,
5850                                           nir_intrinsic_report_ray_intersection);
5851       intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
5852       intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
5853       nir_def_init(&intrin->instr, &intrin->def, 1, 1);
5854       nir_builder_instr_insert(&b->nb, &intrin->instr);
5855       vtn_push_nir_ssa(b, w[2], &intrin->def);
5856       break;
5857    }
5858 
5859    case SpvOpIgnoreIntersectionNV:
5860       intrin = nir_intrinsic_instr_create(b->nb.shader,
5861                                           nir_intrinsic_ignore_ray_intersection);
5862       nir_builder_instr_insert(&b->nb, &intrin->instr);
5863       break;
5864 
5865    case SpvOpTerminateRayNV:
5866       intrin = nir_intrinsic_instr_create(b->nb.shader,
5867                                           nir_intrinsic_terminate_ray);
5868       nir_builder_instr_insert(&b->nb, &intrin->instr);
5869       break;
5870 
5871    case SpvOpExecuteCallableNV:
5872    case SpvOpExecuteCallableKHR: {
5873       intrin = nir_intrinsic_instr_create(b->nb.shader,
5874                                           nir_intrinsic_execute_callable);
5875       intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
5876       nir_deref_instr *payload;
5877       if (opcode == SpvOpExecuteCallableNV)
5878          payload = vtn_get_call_payload_for_location(b, w[2]);
5879       else
5880          payload = vtn_nir_deref(b, w[2]);
5881       intrin->src[1] = nir_src_for_ssa(&payload->def);
5882       nir_builder_instr_insert(&b->nb, &intrin->instr);
5883       break;
5884    }
5885 
5886    default:
5887       vtn_fail_with_opcode("Unhandled opcode", opcode);
5888    }
5889 }
5890 
5891 static void
vtn_handle_write_packed_primitive_indices(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5892 vtn_handle_write_packed_primitive_indices(struct vtn_builder *b, SpvOp opcode,
5893                                           const uint32_t *w, unsigned count)
5894 {
5895    vtn_assert(opcode == SpvOpWritePackedPrimitiveIndices4x8NV);
5896 
5897    /* TODO(mesh): Use or create a primitive that allow the unpacking to
5898     * happen in the backend.  What we have here is functional but too
5899     * blunt.
5900     */
5901 
5902    struct vtn_type *offset_type = vtn_get_value_type(b, w[1]);
5903    vtn_fail_if(offset_type->base_type != vtn_base_type_scalar ||
5904                offset_type->type != glsl_uint_type(),
5905                "Index Offset type of OpWritePackedPrimitiveIndices4x8NV "
5906                "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5907 
5908    struct vtn_type *packed_type = vtn_get_value_type(b, w[2]);
5909    vtn_fail_if(packed_type->base_type != vtn_base_type_scalar ||
5910                packed_type->type != glsl_uint_type(),
5911                "Packed Indices type of OpWritePackedPrimitiveIndices4x8NV "
5912                "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5913 
5914    nir_deref_instr *indices = NULL;
5915    nir_foreach_variable_with_modes(var, b->nb.shader, nir_var_shader_out) {
5916       if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) {
5917          indices = nir_build_deref_var(&b->nb, var);
5918          break;
5919       }
5920    }
5921 
5922    /* It may be the case that the variable is not present in the
5923     * entry point interface list.
5924     *
5925     * See https://github.com/KhronosGroup/SPIRV-Registry/issues/104.
5926     */
5927 
5928    if (!indices) {
5929       unsigned vertices_per_prim =
5930          mesa_vertices_per_prim(b->shader->info.mesh.primitive_type);
5931       unsigned max_prim_indices =
5932          vertices_per_prim * b->shader->info.mesh.max_primitives_out;
5933       const struct glsl_type *t =
5934          glsl_array_type(glsl_uint_type(), max_prim_indices, 0);
5935       nir_variable *var =
5936          nir_variable_create(b->shader, nir_var_shader_out, t,
5937                              "gl_PrimitiveIndicesNV");
5938 
5939       var->data.location = VARYING_SLOT_PRIMITIVE_INDICES;
5940       var->data.interpolation = INTERP_MODE_NONE;
5941       indices = nir_build_deref_var(&b->nb, var);
5942    }
5943 
5944    nir_def *offset = vtn_get_nir_ssa(b, w[1]);
5945    nir_def *packed = vtn_get_nir_ssa(b, w[2]);
5946    nir_def *unpacked = nir_unpack_bits(&b->nb, packed, 8);
5947    for (int i = 0; i < 4; i++) {
5948       nir_deref_instr *offset_deref =
5949          nir_build_deref_array(&b->nb, indices,
5950                                nir_iadd_imm(&b->nb, offset, i));
5951       nir_def *val = nir_u2u32(&b->nb, nir_channel(&b->nb, unpacked, i));
5952 
5953       nir_store_deref(&b->nb, offset_deref, val, 0x1);
5954    }
5955 }
5956 
5957 struct ray_query_value {
5958    nir_ray_query_value     nir_value;
5959    const struct glsl_type *glsl_type;
5960 };
5961 
5962 static struct ray_query_value
spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode)5963 spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder *b,
5964                                       SpvOp opcode)
5965 {
5966    switch (opcode) {
5967 #define CASE(_spv, _nir, _type) case SpvOpRayQueryGet##_spv:            \
5968       return (struct ray_query_value) { .nir_value = nir_ray_query_value_##_nir, .glsl_type = _type }
5969       CASE(RayTMinKHR,                                            tmin,                                   glsl_floatN_t_type(32));
5970       CASE(RayFlagsKHR,                                           flags,                                  glsl_uint_type());
5971       CASE(WorldRayDirectionKHR,                                  world_ray_direction,                    glsl_vec_type(3));
5972       CASE(WorldRayOriginKHR,                                     world_ray_origin,                       glsl_vec_type(3));
5973       CASE(IntersectionTypeKHR,                                   intersection_type,                      glsl_uint_type());
5974       CASE(IntersectionTKHR,                                      intersection_t,                         glsl_floatN_t_type(32));
5975       CASE(IntersectionInstanceCustomIndexKHR,                    intersection_instance_custom_index,     glsl_int_type());
5976       CASE(IntersectionInstanceIdKHR,                             intersection_instance_id,               glsl_int_type());
5977       CASE(IntersectionInstanceShaderBindingTableRecordOffsetKHR, intersection_instance_sbt_index,        glsl_uint_type());
5978       CASE(IntersectionGeometryIndexKHR,                          intersection_geometry_index,            glsl_int_type());
5979       CASE(IntersectionPrimitiveIndexKHR,                         intersection_primitive_index,           glsl_int_type());
5980       CASE(IntersectionBarycentricsKHR,                           intersection_barycentrics,              glsl_vec_type(2));
5981       CASE(IntersectionFrontFaceKHR,                              intersection_front_face,                glsl_bool_type());
5982       CASE(IntersectionCandidateAABBOpaqueKHR,                    intersection_candidate_aabb_opaque,     glsl_bool_type());
5983       CASE(IntersectionObjectToWorldKHR,                          intersection_object_to_world,           glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
5984       CASE(IntersectionWorldToObjectKHR,                          intersection_world_to_object,           glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
5985       CASE(IntersectionObjectRayOriginKHR,                        intersection_object_ray_origin,         glsl_vec_type(3));
5986       CASE(IntersectionObjectRayDirectionKHR,                     intersection_object_ray_direction,      glsl_vec_type(3));
5987       CASE(IntersectionTriangleVertexPositionsKHR,                intersection_triangle_vertex_positions, glsl_array_type(glsl_vec_type(3), 3,
5988                                                                                                                           glsl_get_explicit_stride(glsl_vec_type(3))));
5989 #undef CASE
5990    default:
5991       vtn_fail_with_opcode("Unhandled opcode", opcode);
5992    }
5993 }
5994 
5995 static void
ray_query_load_intrinsic_create(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_def * src0,bool committed)5996 ray_query_load_intrinsic_create(struct vtn_builder *b, SpvOp opcode,
5997                                 const uint32_t *w, nir_def *src0,
5998                                 bool committed)
5999 {
6000    struct ray_query_value value =
6001       spirv_to_nir_type_ray_query_intrinsic(b, opcode);
6002 
6003    if (glsl_type_is_array_or_matrix(value.glsl_type)) {
6004       const struct glsl_type *elem_type = glsl_get_array_element(value.glsl_type);
6005       const unsigned elems = glsl_get_length(value.glsl_type);
6006 
6007       struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, value.glsl_type);
6008       for (unsigned i = 0; i < elems; i++) {
6009          ssa->elems[i]->def =
6010             nir_rq_load(&b->nb,
6011                         glsl_get_vector_elements(elem_type),
6012                         glsl_get_bit_size(elem_type),
6013                         src0,
6014                         .ray_query_value = value.nir_value,
6015                         .committed = committed,
6016                         .column = i);
6017       }
6018 
6019       vtn_push_ssa_value(b, w[2], ssa);
6020    } else {
6021       assert(glsl_type_is_vector_or_scalar(value.glsl_type));
6022 
6023       vtn_push_nir_ssa(b, w[2],
6024                        nir_rq_load(&b->nb,
6025                                    glsl_get_vector_elements(value.glsl_type),
6026                                    glsl_get_bit_size(value.glsl_type),
6027                                    src0,
6028                                    .ray_query_value = value.nir_value,
6029                                    .committed = committed));
6030    }
6031 }
6032 
6033 static void
vtn_handle_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6034 vtn_handle_ray_query_intrinsic(struct vtn_builder *b, SpvOp opcode,
6035                                const uint32_t *w, unsigned count)
6036 {
6037    switch (opcode) {
6038    case SpvOpRayQueryInitializeKHR: {
6039       nir_intrinsic_instr *intrin =
6040          nir_intrinsic_instr_create(b->nb.shader,
6041                                     nir_intrinsic_rq_initialize);
6042       /* The sources are in the same order in the NIR intrinsic */
6043       for (unsigned i = 0; i < 8; i++)
6044          intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
6045       nir_builder_instr_insert(&b->nb, &intrin->instr);
6046       break;
6047    }
6048 
6049    case SpvOpRayQueryTerminateKHR:
6050       nir_rq_terminate(&b->nb, vtn_ssa_value(b, w[1])->def);
6051       break;
6052 
6053    case SpvOpRayQueryProceedKHR:
6054       vtn_push_nir_ssa(b, w[2],
6055                        nir_rq_proceed(&b->nb, 1, vtn_ssa_value(b, w[3])->def));
6056       break;
6057 
6058    case SpvOpRayQueryGenerateIntersectionKHR:
6059       nir_rq_generate_intersection(&b->nb,
6060                                    vtn_ssa_value(b, w[1])->def,
6061                                    vtn_ssa_value(b, w[2])->def);
6062       break;
6063 
6064    case SpvOpRayQueryConfirmIntersectionKHR:
6065       nir_rq_confirm_intersection(&b->nb, vtn_ssa_value(b, w[1])->def);
6066       break;
6067 
6068    case SpvOpRayQueryGetIntersectionTKHR:
6069    case SpvOpRayQueryGetIntersectionTypeKHR:
6070    case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6071    case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6072    case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6073    case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6074    case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6075    case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6076    case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6077    case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6078    case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6079    case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6080    case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6081    case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6082       ray_query_load_intrinsic_create(b, opcode, w,
6083                                       vtn_ssa_value(b, w[3])->def,
6084                                       vtn_constant_uint(b, w[4]));
6085       break;
6086 
6087    case SpvOpRayQueryGetRayTMinKHR:
6088    case SpvOpRayQueryGetRayFlagsKHR:
6089    case SpvOpRayQueryGetWorldRayDirectionKHR:
6090    case SpvOpRayQueryGetWorldRayOriginKHR:
6091    case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6092       ray_query_load_intrinsic_create(b, opcode, w,
6093                                       vtn_ssa_value(b, w[3])->def,
6094                                       /* Committed value is ignored for these */
6095                                       false);
6096       break;
6097 
6098    default:
6099       vtn_fail_with_opcode("Unhandled opcode", opcode);
6100    }
6101 }
6102 
6103 static void
vtn_handle_allocate_node_payloads(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6104 vtn_handle_allocate_node_payloads(struct vtn_builder *b, SpvOp opcode,
6105                                     const uint32_t *w, unsigned count)
6106 {
6107    vtn_assert(opcode == SpvOpAllocateNodePayloadsAMDX);
6108 
6109    nir_def *payloads = vtn_ssa_value(b, w[1])->def;
6110    mesa_scope scope = vtn_translate_scope(b, vtn_constant_uint(b, w[2]));
6111    nir_def *payload_count = vtn_ssa_value(b, w[3])->def;
6112    nir_def *node_index = vtn_ssa_value(b, w[4])->def;
6113 
6114    nir_initialize_node_payloads(&b->nb, payloads, payload_count, node_index, .execution_scope = scope);
6115 }
6116 
6117 static bool
vtn_handle_body_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6118 vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
6119                             const uint32_t *w, unsigned count)
6120 {
6121    if (b->options->debug_info) {
6122       nir_debug_info_instr *instr =
6123          nir_debug_info_instr_create(b->shader, nir_debug_info_src_loc, 0);
6124       instr->src_loc.spirv_offset = b->spirv_offset;
6125       instr->src_loc.source = nir_debug_info_spirv;
6126 
6127       if (b->file) {
6128          nir_def *filename;
6129          struct hash_entry *he = _mesa_hash_table_search(b->strings, b->file);
6130          if (he) {
6131             filename = he->data;
6132          } else {
6133             nir_builder _b = nir_builder_at(nir_before_cf_list(&b->nb.impl->body));
6134             filename = nir_build_string(&_b, b->file);
6135             _mesa_hash_table_insert(b->strings, b->file, filename);
6136          }
6137 
6138          instr->src_loc.filename = nir_src_for_ssa(filename);
6139          /* Make sure line is at least 1 since 0 is reserved for spirv_offset-only
6140           * source locations.
6141           */
6142          instr->src_loc.line = MAX2(b->line, 1);
6143          instr->src_loc.column = b->col;
6144       }
6145 
6146       nir_builder_instr_insert(&b->nb, &instr->instr);
6147    }
6148 
6149    switch (opcode) {
6150    case SpvOpLabel:
6151       break;
6152 
6153    case SpvOpLoopMerge:
6154    case SpvOpSelectionMerge:
6155       /* This is handled by cfg pre-pass and walk_blocks */
6156       break;
6157 
6158    case SpvOpUndef: {
6159       struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
6160       val->type = vtn_get_type(b, w[1]);
6161       break;
6162    }
6163 
6164    case SpvOpExtInst:
6165    case SpvOpExtInstWithForwardRefsKHR:
6166       vtn_handle_extension(b, opcode, w, count);
6167       break;
6168 
6169    case SpvOpVariable:
6170    case SpvOpLoad:
6171    case SpvOpStore:
6172    case SpvOpCopyMemory:
6173    case SpvOpCopyMemorySized:
6174    case SpvOpAccessChain:
6175    case SpvOpPtrAccessChain:
6176    case SpvOpInBoundsAccessChain:
6177    case SpvOpInBoundsPtrAccessChain:
6178    case SpvOpArrayLength:
6179    case SpvOpConvertPtrToU:
6180    case SpvOpConvertUToPtr:
6181    case SpvOpGenericCastToPtrExplicit:
6182    case SpvOpGenericPtrMemSemantics:
6183    case SpvOpSubgroupBlockReadINTEL:
6184    case SpvOpSubgroupBlockWriteINTEL:
6185    case SpvOpConvertUToAccelerationStructureKHR:
6186       vtn_handle_variables(b, opcode, w, count);
6187       break;
6188 
6189    case SpvOpFunctionCall:
6190       vtn_handle_function_call(b, opcode, w, count);
6191       break;
6192 
6193    case SpvOpSampledImage:
6194    case SpvOpImage:
6195    case SpvOpImageSparseTexelsResident:
6196    case SpvOpImageSampleImplicitLod:
6197    case SpvOpImageSparseSampleImplicitLod:
6198    case SpvOpImageSampleExplicitLod:
6199    case SpvOpImageSparseSampleExplicitLod:
6200    case SpvOpImageSampleDrefImplicitLod:
6201    case SpvOpImageSparseSampleDrefImplicitLod:
6202    case SpvOpImageSampleDrefExplicitLod:
6203    case SpvOpImageSparseSampleDrefExplicitLod:
6204    case SpvOpImageSampleProjImplicitLod:
6205    case SpvOpImageSampleProjExplicitLod:
6206    case SpvOpImageSampleProjDrefImplicitLod:
6207    case SpvOpImageSampleProjDrefExplicitLod:
6208    case SpvOpImageFetch:
6209    case SpvOpImageSparseFetch:
6210    case SpvOpImageGather:
6211    case SpvOpImageSparseGather:
6212    case SpvOpImageDrefGather:
6213    case SpvOpImageSparseDrefGather:
6214    case SpvOpImageQueryLod:
6215       vtn_handle_texture(b, opcode, w, count);
6216       break;
6217 
6218    case SpvOpImageRead:
6219    case SpvOpImageSparseRead:
6220    case SpvOpImageWrite:
6221    case SpvOpImageTexelPointer:
6222    case SpvOpImageQueryFormat:
6223    case SpvOpImageQueryOrder:
6224       vtn_handle_image(b, opcode, w, count);
6225       break;
6226 
6227    case SpvOpImageQueryLevels:
6228    case SpvOpImageQuerySamples:
6229    case SpvOpImageQuerySizeLod:
6230    case SpvOpImageQuerySize: {
6231       struct vtn_type *image_type = vtn_get_value_type(b, w[3]);
6232       vtn_assert(image_type->base_type == vtn_base_type_image);
6233       if (glsl_type_is_image(image_type->glsl_image)) {
6234          vtn_handle_image(b, opcode, w, count);
6235       } else {
6236          vtn_assert(glsl_type_is_texture(image_type->glsl_image));
6237          vtn_handle_texture(b, opcode, w, count);
6238       }
6239       break;
6240    }
6241 
6242    case SpvOpFragmentMaskFetchAMD:
6243    case SpvOpFragmentFetchAMD:
6244       vtn_handle_texture(b, opcode, w, count);
6245       break;
6246 
6247    case SpvOpAtomicLoad:
6248    case SpvOpAtomicExchange:
6249    case SpvOpAtomicCompareExchange:
6250    case SpvOpAtomicCompareExchangeWeak:
6251    case SpvOpAtomicIIncrement:
6252    case SpvOpAtomicIDecrement:
6253    case SpvOpAtomicIAdd:
6254    case SpvOpAtomicISub:
6255    case SpvOpAtomicSMin:
6256    case SpvOpAtomicUMin:
6257    case SpvOpAtomicSMax:
6258    case SpvOpAtomicUMax:
6259    case SpvOpAtomicAnd:
6260    case SpvOpAtomicOr:
6261    case SpvOpAtomicXor:
6262    case SpvOpAtomicFAddEXT:
6263    case SpvOpAtomicFMinEXT:
6264    case SpvOpAtomicFMaxEXT:
6265    case SpvOpAtomicFlagTestAndSet: {
6266       struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
6267       if (pointer->value_type == vtn_value_type_image_pointer) {
6268          vtn_handle_image(b, opcode, w, count);
6269       } else {
6270          vtn_assert(pointer->value_type == vtn_value_type_pointer);
6271          vtn_handle_atomics(b, opcode, w, count);
6272       }
6273       break;
6274    }
6275 
6276    case SpvOpAtomicStore:
6277    case SpvOpAtomicFlagClear: {
6278       struct vtn_value *pointer = vtn_untyped_value(b, w[1]);
6279       if (pointer->value_type == vtn_value_type_image_pointer) {
6280          vtn_handle_image(b, opcode, w, count);
6281       } else {
6282          vtn_assert(pointer->value_type == vtn_value_type_pointer);
6283          vtn_handle_atomics(b, opcode, w, count);
6284       }
6285       break;
6286    }
6287 
6288    case SpvOpSelect:
6289       vtn_handle_select(b, opcode, w, count);
6290       break;
6291 
6292    case SpvOpSNegate:
6293    case SpvOpFNegate:
6294    case SpvOpNot:
6295    case SpvOpAny:
6296    case SpvOpAll:
6297    case SpvOpConvertFToU:
6298    case SpvOpConvertFToS:
6299    case SpvOpConvertSToF:
6300    case SpvOpConvertUToF:
6301    case SpvOpUConvert:
6302    case SpvOpSConvert:
6303    case SpvOpFConvert:
6304    case SpvOpQuantizeToF16:
6305    case SpvOpSatConvertSToU:
6306    case SpvOpSatConvertUToS:
6307    case SpvOpPtrCastToGeneric:
6308    case SpvOpGenericCastToPtr:
6309    case SpvOpIsNan:
6310    case SpvOpIsInf:
6311    case SpvOpIsFinite:
6312    case SpvOpIsNormal:
6313    case SpvOpSignBitSet:
6314    case SpvOpLessOrGreater:
6315    case SpvOpOrdered:
6316    case SpvOpUnordered:
6317    case SpvOpIAdd:
6318    case SpvOpFAdd:
6319    case SpvOpISub:
6320    case SpvOpFSub:
6321    case SpvOpIMul:
6322    case SpvOpFMul:
6323    case SpvOpUDiv:
6324    case SpvOpSDiv:
6325    case SpvOpFDiv:
6326    case SpvOpUMod:
6327    case SpvOpSRem:
6328    case SpvOpSMod:
6329    case SpvOpFRem:
6330    case SpvOpFMod:
6331    case SpvOpVectorTimesScalar:
6332    case SpvOpDot:
6333    case SpvOpIAddCarry:
6334    case SpvOpISubBorrow:
6335    case SpvOpUMulExtended:
6336    case SpvOpSMulExtended:
6337    case SpvOpShiftRightLogical:
6338    case SpvOpShiftRightArithmetic:
6339    case SpvOpShiftLeftLogical:
6340    case SpvOpLogicalEqual:
6341    case SpvOpLogicalNotEqual:
6342    case SpvOpLogicalOr:
6343    case SpvOpLogicalAnd:
6344    case SpvOpLogicalNot:
6345    case SpvOpBitwiseOr:
6346    case SpvOpBitwiseXor:
6347    case SpvOpBitwiseAnd:
6348    case SpvOpIEqual:
6349    case SpvOpFOrdEqual:
6350    case SpvOpFUnordEqual:
6351    case SpvOpINotEqual:
6352    case SpvOpFOrdNotEqual:
6353    case SpvOpFUnordNotEqual:
6354    case SpvOpULessThan:
6355    case SpvOpSLessThan:
6356    case SpvOpFOrdLessThan:
6357    case SpvOpFUnordLessThan:
6358    case SpvOpUGreaterThan:
6359    case SpvOpSGreaterThan:
6360    case SpvOpFOrdGreaterThan:
6361    case SpvOpFUnordGreaterThan:
6362    case SpvOpULessThanEqual:
6363    case SpvOpSLessThanEqual:
6364    case SpvOpFOrdLessThanEqual:
6365    case SpvOpFUnordLessThanEqual:
6366    case SpvOpUGreaterThanEqual:
6367    case SpvOpSGreaterThanEqual:
6368    case SpvOpFOrdGreaterThanEqual:
6369    case SpvOpFUnordGreaterThanEqual:
6370    case SpvOpDPdx:
6371    case SpvOpDPdy:
6372    case SpvOpFwidth:
6373    case SpvOpDPdxFine:
6374    case SpvOpDPdyFine:
6375    case SpvOpFwidthFine:
6376    case SpvOpDPdxCoarse:
6377    case SpvOpDPdyCoarse:
6378    case SpvOpFwidthCoarse:
6379    case SpvOpBitFieldInsert:
6380    case SpvOpBitFieldSExtract:
6381    case SpvOpBitFieldUExtract:
6382    case SpvOpBitReverse:
6383    case SpvOpBitCount:
6384    case SpvOpTranspose:
6385    case SpvOpOuterProduct:
6386    case SpvOpMatrixTimesScalar:
6387    case SpvOpVectorTimesMatrix:
6388    case SpvOpMatrixTimesVector:
6389    case SpvOpMatrixTimesMatrix:
6390    case SpvOpUCountLeadingZerosINTEL:
6391    case SpvOpUCountTrailingZerosINTEL:
6392    case SpvOpAbsISubINTEL:
6393    case SpvOpAbsUSubINTEL:
6394    case SpvOpIAddSatINTEL:
6395    case SpvOpUAddSatINTEL:
6396    case SpvOpIAverageINTEL:
6397    case SpvOpUAverageINTEL:
6398    case SpvOpIAverageRoundedINTEL:
6399    case SpvOpUAverageRoundedINTEL:
6400    case SpvOpISubSatINTEL:
6401    case SpvOpUSubSatINTEL:
6402    case SpvOpIMul32x16INTEL:
6403    case SpvOpUMul32x16INTEL:
6404       vtn_handle_alu(b, opcode, w, count);
6405       break;
6406 
6407    case SpvOpSDotKHR:
6408    case SpvOpUDotKHR:
6409    case SpvOpSUDotKHR:
6410    case SpvOpSDotAccSatKHR:
6411    case SpvOpUDotAccSatKHR:
6412    case SpvOpSUDotAccSatKHR:
6413       vtn_handle_integer_dot(b, opcode, w, count);
6414       break;
6415 
6416    case SpvOpBitcast:
6417       vtn_handle_bitcast(b, w, count);
6418       break;
6419 
6420    /* TODO: One day, we should probably do something with this information
6421     * For now, though, it's safe to implement them as no-ops.
6422     * Needed for Rusticl sycl support.
6423     */
6424    case SpvOpAssumeTrueKHR:
6425       break;
6426 
6427    case SpvOpExpectKHR:
6428    case SpvOpVectorExtractDynamic:
6429    case SpvOpVectorInsertDynamic:
6430    case SpvOpVectorShuffle:
6431    case SpvOpCompositeConstruct:
6432    case SpvOpCompositeConstructReplicateEXT:
6433    case SpvOpCompositeExtract:
6434    case SpvOpCompositeInsert:
6435    case SpvOpCopyLogical:
6436    case SpvOpCopyObject:
6437       vtn_handle_composite(b, opcode, w, count);
6438       break;
6439 
6440    case SpvOpEmitVertex:
6441    case SpvOpEndPrimitive:
6442    case SpvOpEmitStreamVertex:
6443    case SpvOpEndStreamPrimitive:
6444    case SpvOpControlBarrier:
6445    case SpvOpMemoryBarrier:
6446       vtn_handle_barrier(b, opcode, w, count);
6447       break;
6448 
6449    case SpvOpGroupNonUniformElect:
6450    case SpvOpGroupNonUniformAll:
6451    case SpvOpGroupNonUniformAny:
6452    case SpvOpGroupNonUniformAllEqual:
6453    case SpvOpGroupNonUniformBroadcast:
6454    case SpvOpGroupNonUniformBroadcastFirst:
6455    case SpvOpGroupNonUniformBallot:
6456    case SpvOpGroupNonUniformInverseBallot:
6457    case SpvOpGroupNonUniformBallotBitExtract:
6458    case SpvOpGroupNonUniformBallotBitCount:
6459    case SpvOpGroupNonUniformBallotFindLSB:
6460    case SpvOpGroupNonUniformBallotFindMSB:
6461    case SpvOpGroupNonUniformShuffle:
6462    case SpvOpGroupNonUniformShuffleXor:
6463    case SpvOpGroupNonUniformShuffleUp:
6464    case SpvOpGroupNonUniformShuffleDown:
6465    case SpvOpGroupNonUniformIAdd:
6466    case SpvOpGroupNonUniformFAdd:
6467    case SpvOpGroupNonUniformIMul:
6468    case SpvOpGroupNonUniformFMul:
6469    case SpvOpGroupNonUniformSMin:
6470    case SpvOpGroupNonUniformUMin:
6471    case SpvOpGroupNonUniformFMin:
6472    case SpvOpGroupNonUniformSMax:
6473    case SpvOpGroupNonUniformUMax:
6474    case SpvOpGroupNonUniformFMax:
6475    case SpvOpGroupNonUniformBitwiseAnd:
6476    case SpvOpGroupNonUniformBitwiseOr:
6477    case SpvOpGroupNonUniformBitwiseXor:
6478    case SpvOpGroupNonUniformLogicalAnd:
6479    case SpvOpGroupNonUniformLogicalOr:
6480    case SpvOpGroupNonUniformLogicalXor:
6481    case SpvOpGroupNonUniformQuadBroadcast:
6482    case SpvOpGroupNonUniformQuadSwap:
6483    case SpvOpGroupNonUniformQuadAllKHR:
6484    case SpvOpGroupNonUniformQuadAnyKHR:
6485    case SpvOpGroupAll:
6486    case SpvOpGroupAny:
6487    case SpvOpGroupBroadcast:
6488    case SpvOpGroupIAdd:
6489    case SpvOpGroupFAdd:
6490    case SpvOpGroupFMin:
6491    case SpvOpGroupUMin:
6492    case SpvOpGroupSMin:
6493    case SpvOpGroupFMax:
6494    case SpvOpGroupUMax:
6495    case SpvOpGroupSMax:
6496    case SpvOpSubgroupBallotKHR:
6497    case SpvOpSubgroupFirstInvocationKHR:
6498    case SpvOpSubgroupReadInvocationKHR:
6499    case SpvOpSubgroupAllKHR:
6500    case SpvOpSubgroupAnyKHR:
6501    case SpvOpSubgroupAllEqualKHR:
6502    case SpvOpGroupIAddNonUniformAMD:
6503    case SpvOpGroupFAddNonUniformAMD:
6504    case SpvOpGroupFMinNonUniformAMD:
6505    case SpvOpGroupUMinNonUniformAMD:
6506    case SpvOpGroupSMinNonUniformAMD:
6507    case SpvOpGroupFMaxNonUniformAMD:
6508    case SpvOpGroupUMaxNonUniformAMD:
6509    case SpvOpGroupSMaxNonUniformAMD:
6510    case SpvOpSubgroupShuffleINTEL:
6511    case SpvOpSubgroupShuffleDownINTEL:
6512    case SpvOpSubgroupShuffleUpINTEL:
6513    case SpvOpSubgroupShuffleXorINTEL:
6514    case SpvOpGroupNonUniformRotateKHR:
6515       vtn_handle_subgroup(b, opcode, w, count);
6516       break;
6517 
6518    case SpvOpPtrDiff:
6519    case SpvOpPtrEqual:
6520    case SpvOpPtrNotEqual:
6521       vtn_handle_ptr(b, opcode, w, count);
6522       break;
6523 
6524    case SpvOpBeginInvocationInterlockEXT:
6525       nir_begin_invocation_interlock(&b->nb);
6526       break;
6527 
6528    case SpvOpEndInvocationInterlockEXT:
6529       nir_end_invocation_interlock(&b->nb);
6530       break;
6531 
6532    case SpvOpDemoteToHelperInvocation: {
6533       nir_demote(&b->nb);
6534       break;
6535    }
6536 
6537    case SpvOpIsHelperInvocationEXT: {
6538       vtn_push_nir_ssa(b, w[2], nir_is_helper_invocation(&b->nb, 1));
6539       break;
6540    }
6541 
6542    case SpvOpReadClockKHR: {
6543       SpvScope scope = vtn_constant_uint(b, w[3]);
6544       vtn_fail_if(scope != SpvScopeDevice && scope != SpvScopeSubgroup,
6545                   "OpReadClockKHR Scope must be either "
6546                   "ScopeDevice or ScopeSubgroup.");
6547 
6548       /* Operation supports two result types: uvec2 and uint64_t.  The NIR
6549        * intrinsic gives uvec2, so pack the result for the other case.
6550        */
6551       nir_def *result = nir_shader_clock(&b->nb, vtn_translate_scope(b, scope));
6552 
6553       struct vtn_type *type = vtn_get_type(b, w[1]);
6554       const struct glsl_type *dest_type = type->type;
6555 
6556       if (glsl_type_is_vector(dest_type)) {
6557          assert(dest_type == glsl_vector_type(GLSL_TYPE_UINT, 2));
6558       } else {
6559          assert(glsl_type_is_scalar(dest_type));
6560          assert(glsl_get_base_type(dest_type) == GLSL_TYPE_UINT64);
6561          result = nir_pack_64_2x32(&b->nb, result);
6562       }
6563 
6564       vtn_push_nir_ssa(b, w[2], result);
6565       break;
6566    }
6567 
6568    case SpvOpTraceNV:
6569    case SpvOpTraceRayKHR:
6570    case SpvOpReportIntersectionKHR:
6571    case SpvOpIgnoreIntersectionNV:
6572    case SpvOpTerminateRayNV:
6573    case SpvOpExecuteCallableNV:
6574    case SpvOpExecuteCallableKHR:
6575       vtn_handle_ray_intrinsic(b, opcode, w, count);
6576       break;
6577 
6578    case SpvOpRayQueryInitializeKHR:
6579    case SpvOpRayQueryTerminateKHR:
6580    case SpvOpRayQueryGenerateIntersectionKHR:
6581    case SpvOpRayQueryConfirmIntersectionKHR:
6582    case SpvOpRayQueryProceedKHR:
6583    case SpvOpRayQueryGetIntersectionTypeKHR:
6584    case SpvOpRayQueryGetRayTMinKHR:
6585    case SpvOpRayQueryGetRayFlagsKHR:
6586    case SpvOpRayQueryGetIntersectionTKHR:
6587    case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6588    case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6589    case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6590    case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6591    case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6592    case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6593    case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6594    case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6595    case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6596    case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6597    case SpvOpRayQueryGetWorldRayDirectionKHR:
6598    case SpvOpRayQueryGetWorldRayOriginKHR:
6599    case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6600    case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6601    case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6602       vtn_handle_ray_query_intrinsic(b, opcode, w, count);
6603       break;
6604 
6605    case SpvOpLifetimeStart:
6606    case SpvOpLifetimeStop:
6607       break;
6608 
6609    case SpvOpGroupAsyncCopy:
6610    case SpvOpGroupWaitEvents:
6611       vtn_handle_opencl_core_instruction(b, opcode, w, count);
6612       break;
6613 
6614    case SpvOpWritePackedPrimitiveIndices4x8NV:
6615       vtn_handle_write_packed_primitive_indices(b, opcode, w, count);
6616       break;
6617 
6618    case SpvOpSetMeshOutputsEXT:
6619       nir_set_vertex_and_primitive_count(
6620          &b->nb, vtn_get_nir_ssa(b, w[1]), vtn_get_nir_ssa(b, w[2]),
6621          nir_undef(&b->nb, 1, 32));
6622       break;
6623 
6624    case SpvOpAllocateNodePayloadsAMDX:
6625       vtn_handle_allocate_node_payloads(b, opcode, w, count);
6626       break;
6627 
6628    case SpvOpFinishWritingNodePayloadAMDX:
6629       break;
6630 
6631    case SpvOpCooperativeMatrixLoadKHR:
6632    case SpvOpCooperativeMatrixStoreKHR:
6633    case SpvOpCooperativeMatrixLengthKHR:
6634    case SpvOpCooperativeMatrixMulAddKHR:
6635       vtn_handle_cooperative_instruction(b, opcode, w, count);
6636       break;
6637 
6638    default:
6639       vtn_fail_with_opcode("Unhandled opcode", opcode);
6640    }
6641 
6642    return true;
6643 }
6644 
6645 static bool
is_glslang(const struct vtn_builder * b)6646 is_glslang(const struct vtn_builder *b)
6647 {
6648    return b->generator_id == vtn_generator_glslang_reference_front_end ||
6649           b->generator_id == vtn_generator_shaderc_over_glslang;
6650 }
6651 
6652 struct vtn_builder*
vtn_create_builder(const uint32_t * words,size_t word_count,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options)6653 vtn_create_builder(const uint32_t *words, size_t word_count,
6654                    gl_shader_stage stage, const char *entry_point_name,
6655                    const struct spirv_to_nir_options *options)
6656 {
6657    /* Initialize the vtn_builder object */
6658    struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
6659 
6660    b->spirv = words;
6661    b->spirv_word_count = word_count;
6662    b->file = NULL;
6663    b->line = -1;
6664    b->col = -1;
6665    list_inithead(&b->functions);
6666    b->entry_point_stage = stage;
6667    b->entry_point_name = entry_point_name;
6668 
6669    /*
6670     * Handle the SPIR-V header (first 5 dwords).
6671     * Can't use vtx_assert() as the setjmp(3) target isn't initialized yet.
6672     */
6673    if (word_count <= 5)
6674       goto fail;
6675 
6676    if (words[0] != SpvMagicNumber) {
6677       vtn_err("words[0] was 0x%x, want 0x%x", words[0], SpvMagicNumber);
6678       goto fail;
6679    }
6680 
6681    b->version = words[1];
6682    if (b->version < 0x10000) {
6683       vtn_err("version was 0x%x, want >= 0x10000", b->version);
6684       goto fail;
6685    }
6686 
6687    b->generator_id = words[2] >> 16;
6688    uint16_t generator_version = words[2];
6689 
6690    unsigned value_id_bound = words[3];
6691    if (words[4] != 0) {
6692       vtn_err("words[4] was %u, want 0", words[4]);
6693       goto fail;
6694    }
6695 
6696    b->value_id_bound = value_id_bound;
6697 
6698    /* Allocate all the data that can be dropped after parsing using
6699     * a cheaper allocation strategy.  Use the value_id_bound and the
6700     * size of the common internal structs to approximate a good
6701     * buffer_size.
6702     */
6703    const linear_opts lin_opts = {
6704       .min_buffer_size = 2 * value_id_bound * (sizeof(struct vtn_value) +
6705                                                sizeof(struct vtn_ssa_value)),
6706    };
6707    b->lin_ctx = linear_context_with_opts(b, &lin_opts);
6708 
6709    struct spirv_to_nir_options *dup_options =
6710       vtn_alloc(b, struct spirv_to_nir_options);
6711    *dup_options = *options;
6712 
6713    b->options = dup_options;
6714    b->values = vtn_zalloc_array(b, struct vtn_value, value_id_bound);
6715 
6716    if (b->options->capabilities != NULL)
6717       b->supported_capabilities = *b->options->capabilities;
6718    else
6719       b->supported_capabilities = implemented_capabilities;
6720 
6721    spirv_capabilities_set(&b->supported_capabilities, SpvCapabilityLinkage,
6722                           b->options->create_library);
6723 
6724    /* In GLSLang commit 8297936dd6eb3, their handling of barrier() was fixed
6725     * to provide correct memory semantics on compute shader barrier()
6726     * commands.  Prior to that, we need to fix them up ourselves.  This
6727     * GLSLang fix caused them to bump to generator version 3.
6728     */
6729    b->wa_glslang_cs_barrier = is_glslang(b) && generator_version < 3;
6730 
6731    /* Identifying the LLVM-SPIRV translator:
6732     *
6733     * The LLVM-SPIRV translator currently doesn't store any generator ID [1].
6734     * Our use case involving the SPIRV-Tools linker also mean we want to check
6735     * for that tool instead. Finally the SPIRV-Tools linker also stores its
6736     * generator ID in the wrong location [2].
6737     *
6738     * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1223
6739     * [2] : https://github.com/KhronosGroup/SPIRV-Tools/pull/4549
6740     */
6741    const bool is_llvm_spirv_translator =
6742       (b->generator_id == 0 &&
6743        generator_version == vtn_generator_spirv_tools_linker) ||
6744       b->generator_id == vtn_generator_spirv_tools_linker;
6745 
6746    /* The LLVM-SPIRV translator generates Undef initializers for _local
6747     * variables [1].
6748     *
6749     * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1224
6750     */
6751    b->wa_llvm_spirv_ignore_workgroup_initializer =
6752       b->options->environment == NIR_SPIRV_OPENCL && is_llvm_spirv_translator;
6753 
6754    /* Older versions of GLSLang would incorrectly emit OpReturn after
6755     * OpEmitMeshTasksEXT. This is incorrect since the latter is already
6756     * a terminator instruction.
6757     *
6758     * See https://github.com/KhronosGroup/glslang/issues/3020 for details.
6759     *
6760     * Clay Shader Compiler (used by GravityMark) is also affected.
6761     */
6762    b->wa_ignore_return_after_emit_mesh_tasks =
6763       (is_glslang(b) && generator_version < 11) ||
6764       (b->generator_id == vtn_generator_clay_shader_compiler &&
6765        generator_version < 18);
6766 
6767    if (b->options->environment == NIR_SPIRV_VULKAN && b->version < 0x10400)
6768       b->vars_used_indirectly = _mesa_pointer_set_create(b);
6769 
6770    if (b->options->debug_info)
6771       b->strings = _mesa_pointer_hash_table_create(b);
6772 
6773    return b;
6774  fail:
6775    ralloc_free(b);
6776    return NULL;
6777 }
6778 
6779 /* See glsl_type_add_to_function_params and vtn_ssa_value_add_to_call_params */
6780 static void
vtn_emit_kernel_entry_point_wrapper_struct_param(struct nir_builder * b,nir_deref_instr * deref,nir_call_instr * call,unsigned * idx)6781 vtn_emit_kernel_entry_point_wrapper_struct_param(struct nir_builder *b,
6782                                                  nir_deref_instr *deref,
6783                                                  nir_call_instr *call,
6784                                                  unsigned *idx)
6785 {
6786    if (glsl_type_is_vector_or_scalar(deref->type)) {
6787       call->params[(*idx)++] = nir_src_for_ssa(nir_load_deref(b, deref));
6788    } else {
6789       unsigned elems = glsl_get_length(deref->type);
6790       for (unsigned i = 0; i < elems; i++) {
6791          nir_deref_instr *child_deref = glsl_type_is_struct(deref->type)
6792             ? nir_build_deref_struct(b, deref, i)
6793             : nir_build_deref_array_imm(b, deref, i);
6794          vtn_emit_kernel_entry_point_wrapper_struct_param(b, child_deref, call,
6795                                                           idx);
6796       }
6797    }
6798 }
6799 
6800 static nir_function *
vtn_emit_kernel_entry_point_wrapper(struct vtn_builder * b,nir_function * entry_point)6801 vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b,
6802                                     nir_function *entry_point)
6803 {
6804    vtn_assert(entry_point == b->entry_point->func->nir_func);
6805    vtn_fail_if(!entry_point->name, "entry points are required to have a name");
6806    const char *func_name =
6807       ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name);
6808 
6809    vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
6810 
6811    nir_function *main_entry_point = nir_function_create(b->shader, func_name);
6812    nir_function_impl *impl = nir_function_impl_create(main_entry_point);
6813    b->nb = nir_builder_at(nir_after_impl(impl));
6814    b->func_param_idx = 0;
6815 
6816    nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point);
6817 
6818    unsigned call_idx = 0;
6819    for (unsigned i = 0; i < b->entry_point->func->type->length; ++i) {
6820       struct vtn_type *param_type = b->entry_point->func->type->params[i];
6821 
6822       b->shader->info.cs.has_variable_shared_mem |=
6823          param_type->storage_class == SpvStorageClassWorkgroup;
6824 
6825       /* consider all pointers to function memory to be parameters passed
6826        * by value
6827        */
6828       bool is_by_val = param_type->base_type == vtn_base_type_pointer &&
6829          param_type->storage_class == SpvStorageClassFunction;
6830 
6831       /* input variable */
6832       nir_variable *in_var = rzalloc(b->nb.shader, nir_variable);
6833 
6834       if (is_by_val) {
6835          in_var->data.mode = nir_var_uniform;
6836          in_var->type = param_type->pointed->type;
6837       } else if (param_type->base_type == vtn_base_type_image) {
6838          in_var->data.mode = nir_var_image;
6839          in_var->type = param_type->glsl_image;
6840          in_var->data.access =
6841             spirv_to_gl_access_qualifier(b, param_type->access_qualifier);
6842       } else if (param_type->base_type == vtn_base_type_sampler) {
6843          in_var->data.mode = nir_var_uniform;
6844          in_var->type = glsl_bare_sampler_type();
6845       } else {
6846          in_var->data.mode = nir_var_uniform;
6847          in_var->type = param_type->type;
6848       }
6849 
6850       in_var->data.read_only = true;
6851       in_var->data.location = i;
6852 
6853       nir_shader_add_variable(b->nb.shader, in_var);
6854 
6855       /* we have to copy the entire variable into function memory */
6856       if (is_by_val) {
6857          nir_variable *copy_var =
6858             nir_local_variable_create(impl, in_var->type, "copy_in");
6859          nir_copy_var(&b->nb, copy_var, in_var);
6860          call->params[call_idx++] =
6861             nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->def);
6862       } else if (param_type->base_type == vtn_base_type_image ||
6863                  param_type->base_type == vtn_base_type_sampler) {
6864          /* Don't load the var, just pass a deref of it */
6865          call->params[call_idx++] =
6866             nir_src_for_ssa(&nir_build_deref_var(&b->nb, in_var)->def);
6867       } else if (param_type->base_type == vtn_base_type_struct) {
6868          /* We decompose struct and array parameters in vtn, so we'll need to
6869           * handle it here explicitly.
6870           * We have to keep the arguments on the actual entry point intact,
6871           * because the runtimes rely on it to match the SPIR-V.
6872           */
6873          nir_deref_instr *deref = nir_build_deref_var(&b->nb, in_var);
6874          vtn_emit_kernel_entry_point_wrapper_struct_param(&b->nb, deref, call,
6875                                                           &call_idx);
6876       } else {
6877          call->params[call_idx++] =
6878             nir_src_for_ssa(nir_load_var(&b->nb, in_var));
6879       }
6880    }
6881 
6882    assert(call_idx == entry_point->num_params);
6883 
6884    nir_builder_instr_insert(&b->nb, &call->instr);
6885 
6886    return main_entry_point;
6887 }
6888 
6889 static bool
can_remove(nir_variable * var,void * data)6890 can_remove(nir_variable *var, void *data)
6891 {
6892    const struct set *vars_used_indirectly = data;
6893    return !_mesa_set_search(vars_used_indirectly, var);
6894 }
6895 
6896 nir_shader *
spirv_to_nir(const uint32_t * words,size_t word_count,struct nir_spirv_specialization * spec,unsigned num_spec,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options,const nir_shader_compiler_options * nir_options)6897 spirv_to_nir(const uint32_t *words, size_t word_count,
6898              struct nir_spirv_specialization *spec, unsigned num_spec,
6899              gl_shader_stage stage, const char *entry_point_name,
6900              const struct spirv_to_nir_options *options,
6901              const nir_shader_compiler_options *nir_options)
6902 
6903 {
6904    mesa_spirv_debug_init();
6905 
6906    if (MESA_SPIRV_DEBUG(ASM))
6907       spirv_print_asm(stderr, words, word_count);
6908 
6909    const uint32_t *word_end = words + word_count;
6910 
6911    struct vtn_builder *b = vtn_create_builder(words, word_count,
6912                                               stage, entry_point_name,
6913                                               options);
6914 
6915    if (b == NULL)
6916       return NULL;
6917 
6918    /* See also _vtn_fail() */
6919    if (vtn_setjmp(b->fail_jump)) {
6920       ralloc_free(b);
6921       return NULL;
6922    }
6923 
6924    const char *dump_path = secure_getenv("MESA_SPIRV_DUMP_PATH");
6925    if (dump_path)
6926       vtn_dump_shader(b, dump_path, "spirv");
6927 
6928    b->shader = nir_shader_create(b, stage, nir_options, NULL);
6929    b->shader->info.subgroup_size = options->subgroup_size;
6930    b->shader->info.float_controls_execution_mode = options->float_controls_execution_mode;
6931    b->shader->info.cs.shader_index = options->shader_index;
6932    _mesa_blake3_compute(words, word_count * sizeof(uint32_t), b->shader->info.source_blake3);
6933 
6934    /* Skip the SPIR-V header, handled at vtn_create_builder */
6935    words+= 5;
6936 
6937    /* Handle all the preamble instructions */
6938    words = vtn_foreach_instruction(b, words, word_end,
6939                                    vtn_handle_preamble_instruction);
6940 
6941    if (b->shader->info.subgroup_size == SUBGROUP_SIZE_UNIFORM &&
6942        b->enabled_capabilities.GroupNonUniform)
6943       b->shader->info.subgroup_size = SUBGROUP_SIZE_API_CONSTANT;
6944 
6945    /* DirectXShaderCompiler and glslang/shaderc both create OpKill from HLSL's
6946     * discard/clip, which uses demote semantics. DirectXShaderCompiler will use
6947     * demote if the extension is enabled, so we disable this workaround in that
6948     * case.
6949     *
6950     * Related glslang issue: https://github.com/KhronosGroup/glslang/issues/2416
6951     */
6952    bool dxsc = b->generator_id == vtn_generator_spiregg;
6953    b->convert_discard_to_demote = (nir_options->discard_is_demote ||
6954                                    (dxsc && !b->enabled_capabilities.DemoteToHelperInvocation) ||
6955                                    (is_glslang(b) && b->source_lang == SpvSourceLanguageHLSL)) &&
6956                                   b->supported_capabilities.DemoteToHelperInvocation;
6957 
6958    if (!options->create_library && b->entry_point == NULL) {
6959       vtn_fail("Entry point not found for %s shader \"%s\"",
6960                _mesa_shader_stage_to_string(stage), entry_point_name);
6961       ralloc_free(b);
6962       return NULL;
6963    }
6964 
6965    /* Ensure a sane address mode is being used for function temps */
6966    assert(nir_address_format_bit_size(b->options->temp_addr_format) == nir_get_ptr_bitsize(b->shader));
6967    assert(nir_address_format_num_components(b->options->temp_addr_format) == 1);
6968 
6969    /* Set shader info defaults */
6970    if (stage == MESA_SHADER_GEOMETRY)
6971       b->shader->info.gs.invocations = 1;
6972 
6973    /* Parse execution modes. */
6974    if (!options->create_library)
6975       vtn_foreach_execution_mode(b, b->entry_point,
6976                                  vtn_handle_execution_mode, NULL);
6977 
6978    b->specializations = spec;
6979    b->num_specializations = num_spec;
6980 
6981    /* Handle all variable, type, and constant instructions */
6982    words = vtn_foreach_instruction(b, words, word_end,
6983                                    vtn_handle_variable_or_type_instruction);
6984 
6985    /* Parse execution modes that depend on IDs. Must happen after we have
6986     * constants parsed.
6987     */
6988    if (!options->create_library)
6989       vtn_foreach_execution_mode(b, b->entry_point,
6990                                  vtn_handle_execution_mode_id, NULL);
6991 
6992    if (b->workgroup_size_builtin) {
6993       vtn_assert(gl_shader_stage_uses_workgroup(stage));
6994       vtn_assert(b->workgroup_size_builtin->type->type ==
6995                  glsl_vector_type(GLSL_TYPE_UINT, 3));
6996 
6997       nir_const_value *const_size =
6998          b->workgroup_size_builtin->constant->values;
6999 
7000       b->shader->info.workgroup_size[0] = const_size[0].u32;
7001       b->shader->info.workgroup_size[1] = const_size[1].u32;
7002       b->shader->info.workgroup_size[2] = const_size[2].u32;
7003    }
7004 
7005    /* Set types on all vtn_values */
7006    vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
7007 
7008    vtn_build_cfg(b, words, word_end);
7009 
7010    if (!options->create_library) {
7011       assert(b->entry_point->value_type == vtn_value_type_function);
7012       b->entry_point->func->referenced = true;
7013    }
7014 
7015    bool progress;
7016    do {
7017       progress = false;
7018       vtn_foreach_function(func, &b->functions) {
7019          if ((options->create_library || func->referenced) && !func->emitted) {
7020             _mesa_hash_table_clear(b->strings, NULL);
7021             vtn_function_emit(b, func, vtn_handle_body_instruction);
7022             progress = true;
7023          }
7024       }
7025    } while (progress);
7026 
7027    if (!options->create_library) {
7028       vtn_assert(b->entry_point->value_type == vtn_value_type_function);
7029       nir_function *entry_point = b->entry_point->func->nir_func;
7030       vtn_assert(entry_point);
7031 
7032       entry_point->dont_inline = false;
7033       /* post process entry_points with input params */
7034       if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL)
7035          entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point);
7036 
7037       entry_point->is_entrypoint = true;
7038    }
7039 
7040    if (MESA_SPIRV_DEBUG(VALUES)) {
7041       vtn_dump_values(b, stdout);
7042    }
7043 
7044    /* structurize the CFG */
7045    nir_lower_goto_ifs(b->shader);
7046 
7047    nir_validate_shader(b->shader, "after spirv cfg");
7048 
7049    nir_lower_continue_constructs(b->shader);
7050 
7051    /* A SPIR-V module can have multiple shaders stages and also multiple
7052     * shaders of the same stage.  Global variables are declared per-module.
7053     *
7054     * Starting in SPIR-V 1.4 the list of global variables is part of
7055     * OpEntryPoint, so only valid ones will be created.  Previous versions
7056     * only have Input and Output variables listed, so remove dead variables to
7057     * clean up the remaining ones.
7058     */
7059    if (!options->create_library && b->version < 0x10400) {
7060       const nir_remove_dead_variables_options dead_opts = {
7061          .can_remove_var = can_remove,
7062          .can_remove_var_data = b->vars_used_indirectly,
7063       };
7064       nir_remove_dead_variables(b->shader, ~(nir_var_function_temp |
7065                                              nir_var_shader_out |
7066                                              nir_var_shader_in |
7067                                              nir_var_system_value),
7068                                 b->vars_used_indirectly ? &dead_opts : NULL);
7069    }
7070 
7071    nir_foreach_variable_in_shader(var, b->shader) {
7072       switch (var->data.mode) {
7073       case nir_var_mem_ubo:
7074          b->shader->info.num_ubos++;
7075          break;
7076       case nir_var_mem_ssbo:
7077          b->shader->info.num_ssbos++;
7078          break;
7079       case nir_var_mem_push_const:
7080          vtn_assert(b->shader->num_uniforms == 0);
7081          b->shader->num_uniforms =
7082             glsl_get_explicit_size(glsl_without_array(var->type), false);
7083          break;
7084       }
7085    }
7086 
7087    /* We sometimes generate bogus derefs that, while never used, give the
7088     * validator a bit of heartburn.  Run dead code to get rid of them.
7089     */
7090    nir_opt_dce(b->shader);
7091 
7092    /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is
7093     * a Block, all of them will be and Blocks are explicitly laid out.
7094     */
7095    nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7096       if (glsl_type_is_interface(var->type)) {
7097          assert(b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR);
7098          b->shader->info.shared_memory_explicit_layout = true;
7099          break;
7100       }
7101    }
7102    if (b->shader->info.shared_memory_explicit_layout) {
7103       unsigned size = 0;
7104       nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7105          assert(glsl_type_is_interface(var->type));
7106          const bool align_to_stride = false;
7107          size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
7108       }
7109       b->shader->info.shared_size = size;
7110    }
7111 
7112    if (stage == MESA_SHADER_FRAGMENT) {
7113       /* From the Vulkan 1.2.199 spec:
7114        *
7115        *    "If a fragment shader entry point’s interface includes an input
7116        *    variable decorated with SamplePosition, Sample Shading is
7117        *    considered enabled with a minSampleShading value of 1.0."
7118        *
7119        * Similar text exists for SampleId.  Regarding the Sample decoration,
7120        * the Vulkan 1.2.199 spec says:
7121        *
7122        *    "If a fragment shader input is decorated with Sample, a separate
7123        *    value must be assigned to that variable for each covered sample in
7124        *    the fragment, and that value must be sampled at the location of
7125        *    the individual sample. When rasterizationSamples is
7126        *    VK_SAMPLE_COUNT_1_BIT, the fragment center must be used for
7127        *    Centroid, Sample, and undecorated attribute interpolation."
7128        *
7129        * Unfortunately, this isn't quite as clear about static use and the
7130        * interface but the static use check should be valid.
7131        *
7132        * For OpenGL, similar language exists but it's all more wishy-washy.
7133        * We'll assume the same behavior across APIs.
7134        */
7135       nir_foreach_variable_with_modes(var, b->shader,
7136                                       nir_var_shader_in |
7137                                       nir_var_system_value) {
7138          struct nir_variable_data *members =
7139             var->members ? var->members : &var->data;
7140          uint16_t num_members = var->members ? var->num_members : 1;
7141          for (uint16_t i = 0; i < num_members; i++) {
7142             if (members[i].mode == nir_var_system_value &&
7143                 (members[i].location == SYSTEM_VALUE_SAMPLE_ID ||
7144                  members[i].location == SYSTEM_VALUE_SAMPLE_POS))
7145                b->shader->info.fs.uses_sample_shading = true;
7146 
7147             if (members[i].mode == nir_var_shader_in && members[i].sample)
7148                b->shader->info.fs.uses_sample_shading = true;
7149          }
7150       }
7151    }
7152 
7153    /* Work around applications that declare shader_call_data variables inside
7154     * ray generation shaders or multiple shader_call_data variables in callable
7155     * shaders.
7156     *
7157     * https://gitlab.freedesktop.org/mesa/mesa/-/issues/5326
7158     * https://gitlab.freedesktop.org/mesa/mesa/-/issues/11585
7159     */
7160    if (gl_shader_stage_is_rt(b->shader->info.stage))
7161       NIR_PASS(_, b->shader, nir_remove_dead_variables, nir_var_shader_call_data,
7162                NULL);
7163 
7164    /* Unparent the shader from the vtn_builder before we delete the builder */
7165    ralloc_steal(NULL, b->shader);
7166 
7167    nir_shader *shader = b->shader;
7168    ralloc_free(b);
7169 
7170    return shader;
7171 }
7172 
7173 static void
print_func_param(FILE * fp,nir_function * func,unsigned p)7174 print_func_param(FILE *fp, nir_function *func, unsigned p)
7175 {
7176    if (func->params[p].name) {
7177       fputs(func->params[p].name, fp);
7178    } else {
7179       fprintf(fp, "arg%u\n", p);
7180    }
7181 }
7182 
7183 static bool
func_to_nir_builder(FILE * fp,struct vtn_function * func)7184 func_to_nir_builder(FILE *fp, struct vtn_function *func)
7185 {
7186    nir_function *nir_func = func->nir_func;
7187    struct vtn_type *return_type = func->type->return_type;
7188    bool returns = return_type->base_type != vtn_base_type_void;
7189 
7190    if (returns && return_type->base_type != vtn_base_type_scalar &&
7191                   return_type->base_type != vtn_base_type_vector) {
7192       fprintf(stderr, "Unsupported return type for %s", nir_func->name);
7193       return false;
7194    }
7195 
7196    /* If there is a return type, the first NIR parameter is the return deref,
7197     * so offset by that for logical parameter iteration.
7198     */
7199    unsigned first_param = returns ? 1 : 0;
7200 
7201    /* Generate function signature */
7202    fprintf(fp, "static inline %s\n", returns ? "nir_def *": "void");
7203    fprintf(fp, "%s(nir_builder *b", nir_func->name);
7204 
7205    for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7206       fprintf(fp, ", nir_def *");
7207       print_func_param(fp, nir_func, i);
7208    }
7209 
7210    fprintf(fp, ")\n{\n");
7211 
7212    /* Validate inputs. nir_validate will do this too, but the
7213     * errors/backtraces from these asserts should be nicer.
7214     */
7215    for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7216       nir_parameter *param = &nir_func->params[i];
7217 
7218       fprintf(fp, "   assert(");
7219       print_func_param(fp, nir_func, i);
7220       fprintf(fp, "->bit_size == %u);\n", param->bit_size);
7221 
7222       fprintf(fp, "   assert(");
7223       print_func_param(fp, nir_func, i);
7224       fprintf(fp, "->num_components == %u);\n", param->num_components);
7225    }
7226 
7227    fprintf(fp, "\n");
7228 
7229    /* Find the function to call. If not found, create a prototype */
7230    fprintf(fp, "   nir_function *func = nir_shader_get_function_for_name(b->shader, \"%s\");\n",
7231            nir_func->name);
7232    fprintf(fp, "\n");
7233    fprintf(fp, "   if (!func) {\n");
7234    fprintf(fp, "      func = nir_function_create(b->shader, \"%s\");\n",
7235            nir_func->name);
7236    fprintf(fp, "      func->num_params = %u;\n", nir_func->num_params);
7237    fprintf(fp, "      func->params = rzalloc_array(b->shader, nir_parameter, func->num_params);\n");
7238 
7239    for (unsigned i = 0; i < nir_func->num_params; ++i) {
7240       nir_parameter param = nir_func->params[i];
7241 
7242       fprintf(fp, "\n");
7243       fprintf(fp, "      func->params[%u].bit_size = %u;\n", i, param.bit_size);
7244       fprintf(fp, "      func->params[%u].num_components = %u;\n", i,
7245               param.num_components);
7246 
7247       if (returns && i == 0) {
7248          fprintf(fp, "      func->params[%u].is_return = true;\n", i);
7249       }
7250 
7251       if (param.name) {
7252          fprintf(fp, "      func->params[%u].name = \"%s\";\n", i, param.name);
7253       }
7254    }
7255 
7256    fprintf(fp, "   }\n\n");
7257 
7258 
7259    if (returns) {
7260       /* We assume that vec3 variables are lowered to vec4. Mirror that here so
7261        * we don't need to lower vec3 to vec4 again at link-time.
7262        */
7263       assert(glsl_type_is_vector_or_scalar(return_type->type));
7264       unsigned elements = return_type->type->vector_elements;
7265       if (elements == 3)
7266          elements = 4;
7267 
7268       /* Reconstruct the return type. */
7269       fprintf(fp, "   const struct glsl_type *ret_type = glsl_vector_type(%u, %u);\n",
7270               return_type->type->base_type, elements);
7271 
7272       /* With the type, we can make a variable and get a deref to pass in */
7273       fprintf(fp, "   nir_variable *ret = nir_local_variable_create(b->impl, ret_type, \"return\");\n");
7274       fprintf(fp, "   nir_deref_instr *deref = nir_build_deref_var(b, ret);\n");
7275 
7276       /* XXX: This is a hack due to ptr size differing between KERNEL and other
7277        * shader stages. This needs to be fixed in core NIR.
7278        */
7279       fprintf(fp, "   deref->def.bit_size = %u;\n", nir_func->params[0].bit_size);
7280       fprintf(fp, "\n");
7281    }
7282 
7283    /* Call the function */
7284    fprintf(fp, "   nir_call(b, func");
7285 
7286    if (returns)
7287       fprintf(fp, ", &deref->def");
7288 
7289    for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7290       fprintf(fp, ", ");
7291       print_func_param(fp, nir_func, i);
7292    }
7293 
7294    fprintf(fp, ");\n");
7295 
7296    /* Load the return value if any, undoing the vec3->vec4 lowering. */
7297    if (returns) {
7298       fprintf(fp, "\n");
7299 
7300       if (return_type->type->vector_elements == 3)
7301          fprintf(fp, "   return nir_trim_vector(b, nir_load_deref(b, deref), 3);\n");
7302       else
7303          fprintf(fp, "   return nir_load_deref(b, deref);\n");
7304    }
7305 
7306    fprintf(fp, "}\n\n");
7307    return true;
7308 }
7309 
7310 bool
spirv_library_to_nir_builder(FILE * fp,const uint32_t * words,size_t word_count,const struct spirv_to_nir_options * options)7311 spirv_library_to_nir_builder(FILE *fp, const uint32_t *words, size_t word_count,
7312                              const struct spirv_to_nir_options *options)
7313 {
7314 #ifndef NDEBUG
7315    mesa_spirv_debug_init();
7316 #endif
7317 
7318    const uint32_t *word_end = words + word_count;
7319 
7320    struct vtn_builder *b = vtn_create_builder(words, word_count,
7321                                               MESA_SHADER_KERNEL, "placeholder name",
7322                                               options);
7323 
7324    if (b == NULL)
7325       return false;
7326 
7327    /* See also _vtn_fail() */
7328    if (vtn_setjmp(b->fail_jump)) {
7329       ralloc_free(b);
7330       return false;
7331    }
7332 
7333    b->shader = nir_shader_create(b, MESA_SHADER_KERNEL,
7334                                  &(const nir_shader_compiler_options){0}, NULL);
7335 
7336    /* Skip the SPIR-V header, handled at vtn_create_builder */
7337    words+= 5;
7338 
7339    /* Handle all the preamble instructions */
7340    words = vtn_foreach_instruction(b, words, word_end,
7341                                    vtn_handle_preamble_instruction);
7342 
7343    /* Handle all variable, type, and constant instructions */
7344    words = vtn_foreach_instruction(b, words, word_end,
7345                                    vtn_handle_variable_or_type_instruction);
7346 
7347    /* Set types on all vtn_values */
7348    vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
7349 
7350    vtn_build_cfg(b, words, word_end);
7351 
7352    fprintf(fp, "#include \"compiler/nir/nir_builder.h\"\n\n");
7353 
7354    nir_fixup_is_exported(b->shader);
7355 
7356    vtn_foreach_function(func, &b->functions) {
7357       if (!func->nir_func->is_exported || func->nir_func->is_entrypoint)
7358          continue;
7359 
7360       if (!func_to_nir_builder(fp, func))
7361          return false;
7362    }
7363 
7364    ralloc_free(b);
7365    return true;
7366 }
7367 
7368 static unsigned
vtn_id_for_type(struct vtn_builder * b,struct vtn_type * type)7369 vtn_id_for_type(struct vtn_builder *b, struct vtn_type *type)
7370 {
7371    for (unsigned i = 0; i < b->value_id_bound; i++) {
7372       struct vtn_value *v = &b->values[i];
7373       if (v->value_type == vtn_value_type_type &&
7374           v->type == type)
7375          return i;
7376    }
7377 
7378    return 0;
7379 }
7380 
7381 void
vtn_print_value(struct vtn_builder * b,struct vtn_value * val,FILE * f)7382 vtn_print_value(struct vtn_builder *b, struct vtn_value *val, FILE *f)
7383 {
7384    fprintf(f, "%s", vtn_value_type_to_string(val->value_type));
7385    switch (val->value_type) {
7386    case vtn_value_type_ssa: {
7387       struct vtn_ssa_value *ssa = val->ssa;
7388       fprintf(f,  " glsl_type=%s", glsl_get_type_name(ssa->type));
7389       break;
7390    }
7391 
7392    case vtn_value_type_constant: {
7393       fprintf(f, " type=%d", vtn_id_for_type(b, val->type));
7394       if (val->is_null_constant)
7395          fprintf(f, " null");
7396       else if (val->is_undef_constant)
7397          fprintf(f, " undef");
7398       break;
7399    }
7400 
7401    case vtn_value_type_pointer: {
7402       struct vtn_pointer *pointer = val->pointer;
7403       fprintf(f, " ptr_type=%u", vtn_id_for_type(b, pointer->type));
7404       fprintf(f, " (pointed-)type=%u", vtn_id_for_type(b, val->pointer->type->pointed));
7405 
7406       if (pointer->deref) {
7407          fprintf(f, "\n           NIR: ");
7408          nir_print_instr(&pointer->deref->instr, f);
7409       }
7410       break;
7411    }
7412 
7413    case vtn_value_type_type: {
7414       struct vtn_type *type = val->type;
7415       fprintf(f, " %s", vtn_base_type_to_string(type->base_type));
7416       switch (type->base_type) {
7417       case vtn_base_type_pointer:
7418          fprintf(f, " deref=%d", vtn_id_for_type(b, type->pointed));
7419          fprintf(f, " %s", spirv_storageclass_to_string(val->type->storage_class));
7420          break;
7421       default:
7422          break;
7423       }
7424       if (type->type)
7425          fprintf(f, " glsl_type=%s", glsl_get_type_name(type->type));
7426       break;
7427    }
7428 
7429    default:
7430       break;
7431    }
7432    fprintf(f, "\n");
7433 }
7434 
7435 void
vtn_dump_values(struct vtn_builder * b,FILE * f)7436 vtn_dump_values(struct vtn_builder *b, FILE *f)
7437 {
7438    fprintf(f, "=== SPIR-V values\n");
7439    for (unsigned i = 1; i < b->value_id_bound; i++) {
7440       struct vtn_value *val = &b->values[i];
7441       fprintf(f, "%8d = ", i);
7442       vtn_print_value(b, val, f);
7443    }
7444    fprintf(f, "===\n");
7445 }
7446