1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Faith Ekstrand (faith@gfxstrand.net)
25 *
26 */
27
28 #include "glsl_types.h"
29 #include "vtn_private.h"
30 #include "nir/nir_vla.h"
31 #include "nir/nir_control_flow.h"
32 #include "nir/nir_constant_expressions.h"
33 #include "nir/nir_deref.h"
34 #include "spirv_info.h"
35
36 #include "util/format/u_format.h"
37 #include "util/u_math.h"
38 #include "util/u_string.h"
39 #include "util/u_debug.h"
40 #include "util/mesa-blake3.h"
41
42 #include <stdio.h>
43
44 /* Table of all implemented capabilities. These are the capabilities that are
45 * implemented in the spirv_to_nir, not what the device supports.
46 *
47 * This list should remain alphabetized. For the purposes of alphabetization,
48 * suffixes do not exist and 8 comes before 16.
49 */
50 static const struct spirv_capabilities implemented_capabilities = {
51 .Addresses = true,
52 .AtomicFloat16AddEXT = true,
53 .AtomicFloat32AddEXT = true,
54 .AtomicFloat64AddEXT = true,
55 .AtomicFloat16MinMaxEXT = true,
56 .AtomicFloat32MinMaxEXT = true,
57 .AtomicFloat64MinMaxEXT = true,
58 .AtomicStorage = true,
59 .ClipDistance = true,
60 .ComputeDerivativeGroupLinearKHR = true,
61 .ComputeDerivativeGroupQuadsKHR = true,
62 .CooperativeMatrixKHR = true,
63 .CullDistance = true,
64 .DemoteToHelperInvocation = true,
65 .DenormFlushToZero = true,
66 .DenormPreserve = true,
67 .DerivativeControl = true,
68 .DeviceGroup = true,
69 .DotProduct = true,
70 .DotProductInput4x8Bit = true,
71 .DotProductInput4x8BitPacked = true,
72 .DotProductInputAll = true,
73 .DrawParameters = true,
74 .ExpectAssumeKHR = true,
75 .Float16 = true,
76 .Float16Buffer = true,
77 .Float64 = true,
78 .FloatControls2 = true,
79 .FragmentBarycentricKHR = true,
80 .FragmentDensityEXT = true,
81 .FragmentFullyCoveredEXT = true,
82 .FragmentMaskAMD = true,
83 .FragmentShaderPixelInterlockEXT = true,
84 .FragmentShaderSampleInterlockEXT = true,
85 .FragmentShadingRateKHR = true,
86 .GenericPointer = true,
87 .Geometry = true,
88 .GeometryPointSize = true,
89 .GeometryStreams = true,
90 .GroupNonUniform = true,
91 .GroupNonUniformArithmetic = true,
92 .GroupNonUniformBallot = true,
93 .GroupNonUniformClustered = true,
94 .GroupNonUniformQuad = true,
95 .GroupNonUniformRotateKHR = true,
96 .GroupNonUniformShuffle = true,
97 .GroupNonUniformShuffleRelative = true,
98 .GroupNonUniformVote = true,
99 .Groups = true,
100 .Image1D = true,
101 .ImageBasic = true,
102 .ImageBuffer = true,
103 .ImageCubeArray = true,
104 .ImageGatherBiasLodAMD = true,
105 .ImageGatherExtended = true,
106 .ImageMipmap = true,
107 .ImageMSArray = true,
108 .ImageQuery = true,
109 .ImageReadWrite = true,
110 .ImageReadWriteLodAMD = true,
111 .ImageRect = true,
112 .InputAttachment = true,
113 .InputAttachmentArrayDynamicIndexingEXT = true,
114 .InputAttachmentArrayNonUniformIndexingEXT = true,
115 .Int8 = true,
116 .Int16 = true,
117 .Int64 = true,
118 .Int64Atomics = true,
119 .Int64ImageEXT = true,
120 .IntegerFunctions2INTEL = true,
121 .InterpolationFunction = true,
122 .Kernel = true,
123 .Linkage = true,
124 .LiteralSampler = true,
125 .Matrix = true,
126 .MeshShadingEXT = true,
127 .MeshShadingNV = true,
128 .MinLod = true,
129 .MultiView = true,
130 .MultiViewport = true,
131 .OptNoneINTEL = true, // FIXME: make codegen emit the EXT name
132 .PerViewAttributesNV = true,
133 .PhysicalStorageBufferAddresses = true,
134 .QuadControlKHR = true,
135 .RayCullMaskKHR = true,
136 .RayQueryKHR = true,
137 .RayQueryPositionFetchKHR = true,
138 .RayTracingKHR = true,
139 .RayTracingPositionFetchKHR = true,
140 .RayTraversalPrimitiveCullingKHR = true,
141 .ReplicatedCompositesEXT = true,
142 .RoundingModeRTE = true,
143 .RoundingModeRTZ = true,
144 .RuntimeDescriptorArrayEXT = true,
145 .Sampled1D = true,
146 .SampledBuffer = true,
147 .SampledCubeArray = true,
148 .SampledImageArrayDynamicIndexing = true,
149 .SampledImageArrayNonUniformIndexingEXT = true,
150 .SampledRect = true,
151 .SampleMaskPostDepthCoverage = true,
152 .SampleRateShading = true,
153 .Shader = true,
154 .ShaderClockKHR = true,
155 .ShaderEnqueueAMDX = true,
156 .ShaderLayer = true,
157 .ShaderNonUniformEXT = true,
158 .ShaderSMBuiltinsNV = true,
159 .ShaderViewportIndex = true,
160 .ShaderViewportIndexLayerEXT = true,
161 .ShaderViewportMaskNV = true,
162 .SignedZeroInfNanPreserve = true,
163 .SparseResidency = true,
164 .StencilExportEXT = true,
165 .StorageBuffer8BitAccess = true,
166 .StorageBufferArrayDynamicIndexing = true,
167 .StorageBufferArrayNonUniformIndexingEXT = true,
168 .StorageImageArrayDynamicIndexing = true,
169 .StorageImageArrayNonUniformIndexingEXT = true,
170 .StorageImageExtendedFormats = true,
171 .StorageImageMultisample = true,
172 .StorageImageReadWithoutFormat = true,
173 .StorageImageWriteWithoutFormat = true,
174 .StorageInputOutput16 = true,
175 .StoragePushConstant8 = true,
176 .StoragePushConstant16 = true,
177 .StorageTexelBufferArrayDynamicIndexingEXT = true,
178 .StorageTexelBufferArrayNonUniformIndexingEXT = true,
179 .StorageUniform16 = true,
180 .StorageUniformBufferBlock16 = true,
181 .SubgroupBallotKHR = true,
182 .SubgroupBufferBlockIOINTEL = true,
183 .SubgroupShuffleINTEL = true,
184 .SubgroupVoteKHR = true,
185 .Tessellation = true,
186 .TessellationPointSize = true,
187 .TransformFeedback = true,
188 .UniformAndStorageBuffer8BitAccess = true,
189 .UniformBufferArrayDynamicIndexing = true,
190 .UniformBufferArrayNonUniformIndexingEXT = true,
191 .UniformTexelBufferArrayDynamicIndexingEXT = true,
192 .UniformTexelBufferArrayNonUniformIndexingEXT = true,
193 .VariablePointers = true,
194 .VariablePointersStorageBuffer = true,
195 .Vector16 = true,
196 .VulkanMemoryModel = true,
197 .VulkanMemoryModelDeviceScope = true,
198 .WorkgroupMemoryExplicitLayoutKHR = true,
199 .WorkgroupMemoryExplicitLayout8BitAccessKHR = true,
200 .WorkgroupMemoryExplicitLayout16BitAccessKHR = true,
201 };
202
203 uint32_t mesa_spirv_debug = 0;
204
205 static const struct debug_named_value mesa_spirv_debug_control[] = {
206 { "structured", MESA_SPIRV_DEBUG_STRUCTURED,
207 "Print information of the SPIR-V structured control flow parsing" },
208 { "values", MESA_SPIRV_DEBUG_VALUES,
209 "Print information of the SPIR-V values" },
210 { "asm", MESA_SPIRV_DEBUG_ASM, "Print the SPIR-V assembly" },
211 { "color", MESA_SPIRV_DEBUG_COLOR, "Debug in color, if available" },
212 DEBUG_NAMED_VALUE_END,
213 };
214
215 DEBUG_GET_ONCE_FLAGS_OPTION(mesa_spirv_debug, "MESA_SPIRV_DEBUG", mesa_spirv_debug_control, 0)
216
217 /* DO NOT CALL THIS FUNCTION DIRECTLY. Use mesa_spirv_debug_init() instead */
218 static void
initialize_mesa_spirv_debug(void)219 initialize_mesa_spirv_debug(void)
220 {
221 mesa_spirv_debug = debug_get_option_mesa_spirv_debug();
222 }
223
224 static void
mesa_spirv_debug_init(void)225 mesa_spirv_debug_init(void)
226 {
227 static once_flag initialized_debug_flag = ONCE_FLAG_INIT;
228 call_once(&initialized_debug_flag, initialize_mesa_spirv_debug);
229 }
230
231 #ifndef NDEBUG
232 static enum nir_spirv_debug_level
vtn_default_log_level(void)233 vtn_default_log_level(void)
234 {
235 enum nir_spirv_debug_level level = NIR_SPIRV_DEBUG_LEVEL_WARNING;
236 const char *vtn_log_level_strings[] = {
237 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = "warning",
238 [NIR_SPIRV_DEBUG_LEVEL_INFO] = "info",
239 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = "error",
240 };
241 const char *str = getenv("MESA_SPIRV_LOG_LEVEL");
242
243 if (str == NULL)
244 return level;
245
246 for (int i = 0; i < ARRAY_SIZE(vtn_log_level_strings); i++) {
247 if (strcasecmp(str, vtn_log_level_strings[i]) == 0) {
248 level = i;
249 break;
250 }
251 }
252
253 return level;
254 }
255 #endif
256
257 void
vtn_log(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)258 vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level,
259 size_t spirv_offset, const char *message)
260 {
261 if (b->options->debug.func) {
262 b->options->debug.func(b->options->debug.private_data,
263 level, spirv_offset, message);
264 }
265
266 #ifndef NDEBUG
267 static enum nir_spirv_debug_level default_level =
268 NIR_SPIRV_DEBUG_LEVEL_INVALID;
269
270 if (default_level == NIR_SPIRV_DEBUG_LEVEL_INVALID)
271 default_level = vtn_default_log_level();
272
273 if (level >= default_level)
274 fprintf(stderr, "%s\n", message);
275 #endif
276 }
277
278 void
vtn_logf(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * fmt,...)279 vtn_logf(struct vtn_builder *b, enum nir_spirv_debug_level level,
280 size_t spirv_offset, const char *fmt, ...)
281 {
282 va_list args;
283 char *msg;
284
285 va_start(args, fmt);
286 msg = ralloc_vasprintf(NULL, fmt, args);
287 va_end(args);
288
289 vtn_log(b, level, spirv_offset, msg);
290
291 ralloc_free(msg);
292 }
293
294 static void
vtn_log_err(struct vtn_builder * b,enum nir_spirv_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)295 vtn_log_err(struct vtn_builder *b,
296 enum nir_spirv_debug_level level, const char *prefix,
297 const char *file, unsigned line,
298 const char *fmt, va_list args)
299 {
300 char *msg;
301
302 msg = ralloc_strdup(NULL, prefix);
303
304 #ifndef NDEBUG
305 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line);
306 #endif
307
308 ralloc_asprintf_append(&msg, " ");
309
310 ralloc_vasprintf_append(&msg, fmt, args);
311
312 ralloc_asprintf_append(&msg, "\n %zu bytes into the SPIR-V binary",
313 b->spirv_offset);
314
315 if (b->file) {
316 ralloc_asprintf_append(&msg,
317 "\n in SPIR-V source file %s, line %d, col %d",
318 b->file, b->line, b->col);
319 }
320
321 vtn_log(b, level, b->spirv_offset, msg);
322
323 ralloc_free(msg);
324 }
325
326 static void
vtn_dump_shader(struct vtn_builder * b,const char * path,const char * prefix)327 vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix)
328 {
329 static int idx = 0;
330
331 char filename[1024];
332 int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv",
333 path, prefix, idx++);
334 if (len < 0 || len >= sizeof(filename))
335 return;
336
337 FILE *f = fopen(filename, "wb");
338 if (f == NULL)
339 return;
340
341 fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f);
342 fclose(f);
343
344 vtn_info("SPIR-V shader dumped to %s", filename);
345 }
346
347 void
_vtn_warn(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)348 _vtn_warn(struct vtn_builder *b, const char *file, unsigned line,
349 const char *fmt, ...)
350 {
351 va_list args;
352
353 va_start(args, fmt);
354 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_WARNING, "SPIR-V WARNING:\n",
355 file, line, fmt, args);
356 va_end(args);
357 }
358
359 void
_vtn_err(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)360 _vtn_err(struct vtn_builder *b, const char *file, unsigned line,
361 const char *fmt, ...)
362 {
363 va_list args;
364
365 va_start(args, fmt);
366 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V ERROR:\n",
367 file, line, fmt, args);
368 va_end(args);
369 }
370
371 void
_vtn_fail(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)372 _vtn_fail(struct vtn_builder *b, const char *file, unsigned line,
373 const char *fmt, ...)
374 {
375 va_list args;
376
377 if (MESA_SPIRV_DEBUG(VALUES))
378 vtn_dump_values(b, stderr);
379
380 va_start(args, fmt);
381 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V parsing FAILED:\n",
382 file, line, fmt, args);
383 va_end(args);
384
385 const char *dump_path = secure_getenv("MESA_SPIRV_FAIL_DUMP_PATH");
386 if (dump_path)
387 vtn_dump_shader(b, dump_path, "fail");
388
389 #ifndef NDEBUG
390 if (!b->options->skip_os_break_in_debug_build)
391 os_break();
392 #endif
393
394 vtn_longjmp(b->fail_jump, 1);
395 }
396
397 const char *
vtn_value_type_to_string(enum vtn_value_type t)398 vtn_value_type_to_string(enum vtn_value_type t)
399 {
400 #define CASE(typ) case vtn_value_type_##typ: return #typ
401 switch (t) {
402 CASE(invalid);
403 CASE(undef);
404 CASE(string);
405 CASE(decoration_group);
406 CASE(type);
407 CASE(constant);
408 CASE(pointer);
409 CASE(function);
410 CASE(block);
411 CASE(ssa);
412 CASE(extension);
413 CASE(image_pointer);
414 }
415 #undef CASE
416 unreachable("unknown value type");
417 return "UNKNOWN";
418 }
419
420 static const char *
vtn_base_type_to_string(enum vtn_base_type t)421 vtn_base_type_to_string(enum vtn_base_type t)
422 {
423 #define CASE(typ) case vtn_base_type_##typ: return #typ
424 switch (t) {
425 CASE(void);
426 CASE(scalar);
427 CASE(vector);
428 CASE(matrix);
429 CASE(array);
430 CASE(struct);
431 CASE(pointer);
432 CASE(image);
433 CASE(sampler);
434 CASE(sampled_image);
435 CASE(accel_struct);
436 CASE(ray_query);
437 CASE(function);
438 CASE(event);
439 CASE(cooperative_matrix);
440 }
441 #undef CASE
442 unreachable("unknown base type");
443 return "UNKNOWN";
444 }
445
446
447 void
_vtn_fail_value_type_mismatch(struct vtn_builder * b,uint32_t value_id,enum vtn_value_type value_type)448 _vtn_fail_value_type_mismatch(struct vtn_builder *b, uint32_t value_id,
449 enum vtn_value_type value_type)
450 {
451 struct vtn_value *val = vtn_untyped_value(b, value_id);
452 vtn_fail(
453 "SPIR-V id %u is the wrong kind of value: "
454 "expected '%s' but got '%s'",
455 vtn_id_for_value(b, val),
456 vtn_value_type_to_string(value_type),
457 vtn_value_type_to_string(val->value_type));
458 }
459
_vtn_fail_value_not_pointer(struct vtn_builder * b,uint32_t value_id)460 void _vtn_fail_value_not_pointer(struct vtn_builder *b,
461 uint32_t value_id)
462 {
463 struct vtn_value *val = vtn_untyped_value(b, value_id);
464 vtn_fail("SPIR-V id %u is the wrong kind of value: "
465 "expected 'pointer' OR null constant but got "
466 "'%s' (%s)", value_id,
467 vtn_value_type_to_string(val->value_type),
468 val->is_null_constant ? "null constant" : "not null constant");
469 }
470
471 static struct vtn_ssa_value *
vtn_undef_ssa_value(struct vtn_builder * b,const struct glsl_type * type)472 vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
473 {
474 struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
475 val->type = glsl_get_bare_type(type);
476
477 if (glsl_type_is_cmat(type)) {
478 nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_undef");
479 vtn_set_ssa_value_var(b, val, mat->var);
480 } else if (glsl_type_is_vector_or_scalar(type)) {
481 unsigned num_components = glsl_get_vector_elements(val->type);
482 unsigned bit_size = glsl_get_bit_size(val->type);
483 val->def = nir_undef(&b->nb, num_components, bit_size);
484 } else {
485 unsigned elems = glsl_get_length(val->type);
486 val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
487 if (glsl_type_is_array_or_matrix(type)) {
488 const struct glsl_type *elem_type = glsl_get_array_element(type);
489 for (unsigned i = 0; i < elems; i++)
490 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
491 } else {
492 vtn_assert(glsl_type_is_struct_or_ifc(type));
493 for (unsigned i = 0; i < elems; i++) {
494 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
495 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
496 }
497 }
498 }
499
500 return val;
501 }
502
503 struct vtn_ssa_value *
vtn_const_ssa_value(struct vtn_builder * b,nir_constant * constant,const struct glsl_type * type)504 vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
505 const struct glsl_type *type)
506 {
507 struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
508 val->type = glsl_get_bare_type(type);
509
510 if (glsl_type_is_cmat(type)) {
511 const struct glsl_type *element_type = glsl_get_cmat_element(type);
512
513 nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_constant");
514 nir_cmat_construct(&b->nb, &mat->def,
515 nir_build_imm(&b->nb, 1, glsl_get_bit_size(element_type),
516 constant->values));
517 vtn_set_ssa_value_var(b, val, mat->var);
518 } else if (glsl_type_is_vector_or_scalar(type)) {
519 val->def = nir_build_imm(&b->nb, glsl_get_vector_elements(val->type),
520 glsl_get_bit_size(val->type),
521 constant->values);
522 } else {
523 unsigned elems = glsl_get_length(val->type);
524 val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
525 if (glsl_type_is_array_or_matrix(type)) {
526 const struct glsl_type *elem_type = glsl_get_array_element(type);
527 for (unsigned i = 0; i < elems; i++) {
528 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
529 elem_type);
530 }
531 } else {
532 vtn_assert(glsl_type_is_struct_or_ifc(type));
533 for (unsigned i = 0; i < elems; i++) {
534 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
535 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
536 elem_type);
537 }
538 }
539 }
540
541 return val;
542 }
543
544 struct vtn_ssa_value *
vtn_ssa_value(struct vtn_builder * b,uint32_t value_id)545 vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
546 {
547 struct vtn_value *val = vtn_untyped_value(b, value_id);
548 switch (val->value_type) {
549 case vtn_value_type_undef:
550 return vtn_undef_ssa_value(b, val->type->type);
551
552 case vtn_value_type_constant:
553 return vtn_const_ssa_value(b, val->constant, val->type->type);
554
555 case vtn_value_type_ssa:
556 return val->ssa;
557
558 case vtn_value_type_pointer:
559 vtn_assert(val->pointer->type && val->pointer->type->type);
560 struct vtn_ssa_value *ssa =
561 vtn_create_ssa_value(b, val->pointer->type->type);
562 ssa->def = vtn_pointer_to_ssa(b, val->pointer);
563 return ssa;
564
565 default:
566 vtn_fail("Invalid type for an SSA value");
567 }
568 }
569
570 struct vtn_value *
vtn_push_ssa_value(struct vtn_builder * b,uint32_t value_id,struct vtn_ssa_value * ssa)571 vtn_push_ssa_value(struct vtn_builder *b, uint32_t value_id,
572 struct vtn_ssa_value *ssa)
573 {
574 struct vtn_type *type = vtn_get_value_type(b, value_id);
575
576 /* See vtn_create_ssa_value */
577 vtn_fail_if(ssa->type != glsl_get_bare_type(type->type),
578 "Type mismatch for SPIR-V value %%%u", value_id);
579
580 struct vtn_value *val;
581 if (type->base_type == vtn_base_type_pointer) {
582 val = vtn_push_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type));
583 } else {
584 /* Don't trip the value_type_ssa check in vtn_push_value */
585 val = vtn_push_value(b, value_id, vtn_value_type_invalid);
586 val->value_type = vtn_value_type_ssa;
587 val->ssa = ssa;
588 }
589
590 return val;
591 }
592
593 nir_def *
vtn_get_nir_ssa(struct vtn_builder * b,uint32_t value_id)594 vtn_get_nir_ssa(struct vtn_builder *b, uint32_t value_id)
595 {
596 struct vtn_ssa_value *ssa = vtn_ssa_value(b, value_id);
597 vtn_fail_if(!glsl_type_is_vector_or_scalar(ssa->type),
598 "Expected a vector or scalar type");
599 return ssa->def;
600 }
601
602 struct vtn_value *
vtn_push_nir_ssa(struct vtn_builder * b,uint32_t value_id,nir_def * def)603 vtn_push_nir_ssa(struct vtn_builder *b, uint32_t value_id, nir_def *def)
604 {
605 /* Types for all SPIR-V SSA values are set as part of a pre-pass so the
606 * type will be valid by the time we get here.
607 */
608 struct vtn_type *type = vtn_get_value_type(b, value_id);
609 vtn_fail_if(def->num_components != glsl_get_vector_elements(type->type) ||
610 def->bit_size != glsl_get_bit_size(type->type),
611 "Mismatch between NIR and SPIR-V type.");
612 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
613 ssa->def = def;
614 return vtn_push_ssa_value(b, value_id, ssa);
615 }
616
617 nir_deref_instr *
vtn_get_deref_for_id(struct vtn_builder * b,uint32_t value_id)618 vtn_get_deref_for_id(struct vtn_builder *b, uint32_t value_id)
619 {
620 return vtn_get_deref_for_ssa_value(b, vtn_ssa_value(b, value_id));
621 }
622
623 nir_deref_instr *
vtn_get_deref_for_ssa_value(struct vtn_builder * b,struct vtn_ssa_value * ssa)624 vtn_get_deref_for_ssa_value(struct vtn_builder *b, struct vtn_ssa_value *ssa)
625 {
626 vtn_fail_if(!ssa->is_variable, "Expected an SSA value with a nir_variable");
627 return nir_build_deref_var(&b->nb, ssa->var);
628 }
629
630 struct vtn_value *
vtn_push_var_ssa(struct vtn_builder * b,uint32_t value_id,nir_variable * var)631 vtn_push_var_ssa(struct vtn_builder *b, uint32_t value_id, nir_variable *var)
632 {
633 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, var->type);
634 vtn_set_ssa_value_var(b, ssa, var);
635 return vtn_push_ssa_value(b, value_id, ssa);
636 }
637
638 static enum gl_access_qualifier
spirv_to_gl_access_qualifier(struct vtn_builder * b,SpvAccessQualifier access_qualifier)639 spirv_to_gl_access_qualifier(struct vtn_builder *b,
640 SpvAccessQualifier access_qualifier)
641 {
642 switch (access_qualifier) {
643 case SpvAccessQualifierReadOnly:
644 return ACCESS_NON_WRITEABLE;
645 case SpvAccessQualifierWriteOnly:
646 return ACCESS_NON_READABLE;
647 case SpvAccessQualifierReadWrite:
648 return 0;
649 default:
650 vtn_fail("Invalid image access qualifier");
651 }
652 }
653
654 static nir_deref_instr *
vtn_get_image(struct vtn_builder * b,uint32_t value_id,enum gl_access_qualifier * access)655 vtn_get_image(struct vtn_builder *b, uint32_t value_id,
656 enum gl_access_qualifier *access)
657 {
658 struct vtn_type *type = vtn_get_value_type(b, value_id);
659 vtn_assert(type->base_type == vtn_base_type_image);
660 if (access)
661 *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier);
662 nir_variable_mode mode = glsl_type_is_image(type->glsl_image) ?
663 nir_var_image : nir_var_uniform;
664 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
665 mode, type->glsl_image, 0);
666 }
667
668 static void
vtn_push_image(struct vtn_builder * b,uint32_t value_id,nir_deref_instr * deref,bool propagate_non_uniform)669 vtn_push_image(struct vtn_builder *b, uint32_t value_id,
670 nir_deref_instr *deref, bool propagate_non_uniform)
671 {
672 struct vtn_type *type = vtn_get_value_type(b, value_id);
673 vtn_assert(type->base_type == vtn_base_type_image);
674 struct vtn_value *value = vtn_push_nir_ssa(b, value_id, &deref->def);
675 value->propagated_non_uniform = propagate_non_uniform;
676 }
677
678 static nir_deref_instr *
vtn_get_sampler(struct vtn_builder * b,uint32_t value_id)679 vtn_get_sampler(struct vtn_builder *b, uint32_t value_id)
680 {
681 struct vtn_type *type = vtn_get_value_type(b, value_id);
682 vtn_assert(type->base_type == vtn_base_type_sampler);
683 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
684 nir_var_uniform, glsl_bare_sampler_type(), 0);
685 }
686
687 nir_def *
vtn_sampled_image_to_nir_ssa(struct vtn_builder * b,struct vtn_sampled_image si)688 vtn_sampled_image_to_nir_ssa(struct vtn_builder *b,
689 struct vtn_sampled_image si)
690 {
691 return nir_vec2(&b->nb, &si.image->def, &si.sampler->def);
692 }
693
694 static void
vtn_push_sampled_image(struct vtn_builder * b,uint32_t value_id,struct vtn_sampled_image si,bool propagate_non_uniform)695 vtn_push_sampled_image(struct vtn_builder *b, uint32_t value_id,
696 struct vtn_sampled_image si, bool propagate_non_uniform)
697 {
698 struct vtn_type *type = vtn_get_value_type(b, value_id);
699 vtn_assert(type->base_type == vtn_base_type_sampled_image);
700 struct vtn_value *value = vtn_push_nir_ssa(b, value_id,
701 vtn_sampled_image_to_nir_ssa(b, si));
702 value->propagated_non_uniform = propagate_non_uniform;
703 }
704
705 static struct vtn_sampled_image
vtn_get_sampled_image(struct vtn_builder * b,uint32_t value_id)706 vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id)
707 {
708 struct vtn_type *type = vtn_get_value_type(b, value_id);
709 vtn_assert(type->base_type == vtn_base_type_sampled_image);
710 nir_def *si_vec2 = vtn_get_nir_ssa(b, value_id);
711
712 /* Even though this is a sampled image, we can end up here with a storage
713 * image because OpenCL doesn't distinguish between the two.
714 */
715 const struct glsl_type *image_type = type->image->glsl_image;
716 nir_variable_mode image_mode = glsl_type_is_image(image_type) ?
717 nir_var_image : nir_var_uniform;
718
719 struct vtn_sampled_image si = { NULL, };
720 si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0),
721 image_mode, image_type, 0);
722 si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1),
723 nir_var_uniform,
724 glsl_bare_sampler_type(), 0);
725 return si;
726 }
727
728 const char *
vtn_string_literal(struct vtn_builder * b,const uint32_t * words,unsigned word_count,unsigned * words_used)729 vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
730 unsigned word_count, unsigned *words_used)
731 {
732 /* From the SPIR-V spec:
733 *
734 * "A string is interpreted as a nul-terminated stream of characters.
735 * The character set is Unicode in the UTF-8 encoding scheme. The UTF-8
736 * octets (8-bit bytes) are packed four per word, following the
737 * little-endian convention (i.e., the first octet is in the
738 * lowest-order 8 bits of the word). The final word contains the
739 * string’s nul-termination character (0), and all contents past the
740 * end of the string in the final word are padded with 0."
741 *
742 * On big-endian, we need to byte-swap.
743 */
744 #if UTIL_ARCH_BIG_ENDIAN
745 {
746 uint32_t *copy = vtn_alloc_array(b, uint32_t, word_count);
747 for (unsigned i = 0; i < word_count; i++)
748 copy[i] = util_bswap32(words[i]);
749 words = copy;
750 }
751 #endif
752
753 const char *str = (const char *)words;
754 const char *end = memchr(str, 0, word_count * 4);
755 vtn_fail_if(end == NULL, "String is not null-terminated");
756
757 if (words_used)
758 *words_used = DIV_ROUND_UP(end - str + 1, sizeof(*words));
759
760 return str;
761 }
762
763 const uint32_t *
vtn_foreach_instruction(struct vtn_builder * b,const uint32_t * start,const uint32_t * end,vtn_instruction_handler handler)764 vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
765 const uint32_t *end, vtn_instruction_handler handler)
766 {
767 const uint32_t *w = start;
768 while (w < end) {
769 SpvOp opcode = w[0] & SpvOpCodeMask;
770 unsigned count = w[0] >> SpvWordCountShift;
771 vtn_assert(count >= 1 && w + count <= end);
772
773 b->spirv_offset = (uint8_t *)w - (uint8_t *)b->spirv;
774
775 switch (opcode) {
776 case SpvOpNop:
777 break; /* Do nothing */
778
779 case SpvOpLine:
780 b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
781 b->line = w[2];
782 b->col = w[3];
783 break;
784
785 case SpvOpNoLine:
786 b->file = NULL;
787 b->line = -1;
788 b->col = -1;
789 break;
790
791 default:
792 if (!handler(b, opcode, w, count))
793 return w;
794 break;
795 }
796
797 w += count;
798 }
799
800 assert(w == end);
801 return w;
802 }
803
804 static bool
vtn_handle_non_semantic_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)805 vtn_handle_non_semantic_instruction(struct vtn_builder *b, SpvOp ext_opcode,
806 const uint32_t *w, unsigned count)
807 {
808 /* Do nothing. */
809 return true;
810 }
811
812 static bool
vtn_handle_non_semantic_debug_break_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)813 vtn_handle_non_semantic_debug_break_instruction(struct vtn_builder *b, SpvOp ext_opcode,
814 const uint32_t *w, unsigned count)
815 {
816 nir_debug_break(&b->nb);
817 return true;
818 }
819
820 static void
vtn_handle_extension(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)821 vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
822 const uint32_t *w, unsigned count)
823 {
824 switch (opcode) {
825 case SpvOpExtInstImport: {
826 struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
827 const char *ext = vtn_string_literal(b, &w[2], count - 2, NULL);
828 if (strcmp(ext, "GLSL.std.450") == 0) {
829 val->ext_handler = vtn_handle_glsl450_instruction;
830 } else if ((strcmp(ext, "SPV_AMD_gcn_shader") == 0)
831 && (b->options && b->options->amd_gcn_shader)) {
832 val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
833 } else if ((strcmp(ext, "SPV_AMD_shader_ballot") == 0)
834 && (b->options && b->options->amd_shader_ballot)) {
835 val->ext_handler = vtn_handle_amd_shader_ballot_instruction;
836 } else if ((strcmp(ext, "SPV_AMD_shader_trinary_minmax") == 0)
837 && (b->options && b->options->amd_trinary_minmax)) {
838 val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction;
839 } else if ((strcmp(ext, "SPV_AMD_shader_explicit_vertex_parameter") == 0)
840 && (b->options && b->options->amd_shader_explicit_vertex_parameter)) {
841 val->ext_handler = vtn_handle_amd_shader_explicit_vertex_parameter_instruction;
842 } else if (strcmp(ext, "OpenCL.std") == 0) {
843 val->ext_handler = vtn_handle_opencl_instruction;
844 } else if ((strcmp(ext, "NonSemantic.DebugBreak") == 0)
845 && (b->options && b->options->emit_debug_break)) {
846 val->ext_handler = vtn_handle_non_semantic_debug_break_instruction;
847 } else if (strstr(ext, "NonSemantic.") == ext) {
848 val->ext_handler = vtn_handle_non_semantic_instruction;
849 } else {
850 vtn_fail("Unsupported extension: %s", ext);
851 }
852 break;
853 }
854
855 case SpvOpExtInst:
856 case SpvOpExtInstWithForwardRefsKHR: {
857 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
858
859 if (opcode == SpvOpExtInstWithForwardRefsKHR)
860 assert(val->ext_handler == vtn_handle_non_semantic_instruction);
861
862 bool handled = val->ext_handler(b, w[4], w, count);
863 vtn_assert(handled);
864 break;
865 }
866
867 default:
868 vtn_fail_with_opcode("Unhandled opcode", opcode);
869 }
870 }
871
872 static void
_foreach_decoration_helper(struct vtn_builder * b,struct vtn_value * base_value,int parent_member,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)873 _foreach_decoration_helper(struct vtn_builder *b,
874 struct vtn_value *base_value,
875 int parent_member,
876 struct vtn_value *value,
877 vtn_decoration_foreach_cb cb, void *data)
878 {
879 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
880 int member;
881 if (dec->scope == VTN_DEC_DECORATION) {
882 member = parent_member;
883 } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
884 vtn_fail_if(value->value_type != vtn_value_type_type ||
885 value->type->base_type != vtn_base_type_struct,
886 "OpMemberDecorate and OpGroupMemberDecorate are only "
887 "allowed on OpTypeStruct");
888 /* This means we haven't recursed yet */
889 assert(value == base_value);
890
891 member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
892
893 vtn_fail_if(member >= base_value->type->length,
894 "OpMemberDecorate specifies member %d but the "
895 "OpTypeStruct has only %u members",
896 member, base_value->type->length);
897 } else {
898 /* Not a decoration */
899 assert(dec->scope == VTN_DEC_EXECUTION_MODE ||
900 dec->scope <= VTN_DEC_STRUCT_MEMBER_NAME0);
901 continue;
902 }
903
904 if (dec->group) {
905 assert(dec->group->value_type == vtn_value_type_decoration_group);
906 _foreach_decoration_helper(b, base_value, member, dec->group,
907 cb, data);
908 } else {
909 cb(b, base_value, member, dec, data);
910 }
911 }
912 }
913
914 /** Iterates (recursively if needed) over all of the decorations on a value
915 *
916 * This function iterates over all of the decorations applied to a given
917 * value. If it encounters a decoration group, it recurses into the group
918 * and iterates over all of those decorations as well.
919 */
920 void
vtn_foreach_decoration(struct vtn_builder * b,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)921 vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
922 vtn_decoration_foreach_cb cb, void *data)
923 {
924 _foreach_decoration_helper(b, value, -1, value, cb, data);
925 }
926
927 void
vtn_foreach_execution_mode(struct vtn_builder * b,struct vtn_value * value,vtn_execution_mode_foreach_cb cb,void * data)928 vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
929 vtn_execution_mode_foreach_cb cb, void *data)
930 {
931 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
932 if (dec->scope != VTN_DEC_EXECUTION_MODE)
933 continue;
934
935 assert(dec->group == NULL);
936 cb(b, value, dec, data);
937 }
938 }
939
940 void
vtn_handle_decoration(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)941 vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
942 const uint32_t *w, unsigned count)
943 {
944 const uint32_t *w_end = w + count;
945 const uint32_t target = w[1];
946 w += 2;
947
948 switch (opcode) {
949 case SpvOpDecorationGroup:
950 vtn_push_value(b, target, vtn_value_type_decoration_group);
951 break;
952
953 case SpvOpDecorate:
954 case SpvOpDecorateId:
955 case SpvOpMemberDecorate:
956 case SpvOpDecorateString:
957 case SpvOpMemberDecorateString:
958 case SpvOpExecutionMode:
959 case SpvOpExecutionModeId: {
960 struct vtn_value *val = vtn_untyped_value(b, target);
961
962 struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
963 switch (opcode) {
964 case SpvOpDecorate:
965 case SpvOpDecorateId:
966 case SpvOpDecorateString:
967 dec->scope = VTN_DEC_DECORATION;
968 break;
969 case SpvOpMemberDecorate:
970 case SpvOpMemberDecorateString:
971 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
972 vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */
973 "Member argument of OpMemberDecorate too large");
974 break;
975 case SpvOpExecutionMode:
976 case SpvOpExecutionModeId:
977 dec->scope = VTN_DEC_EXECUTION_MODE;
978 break;
979 default:
980 unreachable("Invalid decoration opcode");
981 }
982 dec->decoration = *(w++);
983 dec->num_operands = w_end - w;
984 dec->operands = w;
985
986 /* Link into the list */
987 dec->next = val->decoration;
988 val->decoration = dec;
989 break;
990 }
991
992 case SpvOpMemberName: {
993 struct vtn_value *val = vtn_untyped_value(b, target);
994 struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
995
996 dec->scope = VTN_DEC_STRUCT_MEMBER_NAME0 - *(w++);
997
998 dec->member_name = vtn_string_literal(b, w, w_end - w, NULL);
999
1000 dec->next = val->decoration;
1001 val->decoration = dec;
1002 break;
1003 }
1004
1005 case SpvOpGroupMemberDecorate:
1006 case SpvOpGroupDecorate: {
1007 struct vtn_value *group =
1008 vtn_value(b, target, vtn_value_type_decoration_group);
1009
1010 for (; w < w_end; w++) {
1011 struct vtn_value *val = vtn_untyped_value(b, *w);
1012 struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
1013
1014 dec->group = group;
1015 if (opcode == SpvOpGroupDecorate) {
1016 dec->scope = VTN_DEC_DECORATION;
1017 } else {
1018 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
1019 vtn_fail_if(dec->scope < 0, /* Check for overflow */
1020 "Member argument of OpGroupMemberDecorate too large");
1021 }
1022
1023 /* Link into the list */
1024 dec->next = val->decoration;
1025 val->decoration = dec;
1026 }
1027 break;
1028 }
1029
1030 default:
1031 unreachable("Unhandled opcode");
1032 }
1033 }
1034
1035 struct member_decoration_ctx {
1036 unsigned num_fields;
1037 struct glsl_struct_field *fields;
1038 struct vtn_type *type;
1039 };
1040
1041 /**
1042 * Returns true if the given type contains a struct decorated Block or
1043 * BufferBlock
1044 */
1045 bool
vtn_type_contains_block(struct vtn_builder * b,struct vtn_type * type)1046 vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type)
1047 {
1048 switch (type->base_type) {
1049 case vtn_base_type_array:
1050 return vtn_type_contains_block(b, type->array_element);
1051 case vtn_base_type_struct:
1052 if (type->block || type->buffer_block)
1053 return true;
1054 for (unsigned i = 0; i < type->length; i++) {
1055 if (vtn_type_contains_block(b, type->members[i]))
1056 return true;
1057 }
1058 return false;
1059 default:
1060 return false;
1061 }
1062 }
1063
1064 /** Returns true if two types are "compatible", i.e. you can do an OpLoad,
1065 * OpStore, or OpCopyMemory between them without breaking anything.
1066 * Technically, the SPIR-V rules require the exact same type ID but this lets
1067 * us internally be a bit looser.
1068 */
1069 bool
vtn_types_compatible(struct vtn_builder * b,struct vtn_type * t1,struct vtn_type * t2)1070 vtn_types_compatible(struct vtn_builder *b,
1071 struct vtn_type *t1, struct vtn_type *t2)
1072 {
1073 if (t1->id == t2->id)
1074 return true;
1075
1076 if (t1->base_type != t2->base_type)
1077 return false;
1078
1079 switch (t1->base_type) {
1080 case vtn_base_type_void:
1081 case vtn_base_type_scalar:
1082 case vtn_base_type_vector:
1083 case vtn_base_type_matrix:
1084 case vtn_base_type_image:
1085 case vtn_base_type_sampler:
1086 case vtn_base_type_sampled_image:
1087 case vtn_base_type_event:
1088 case vtn_base_type_cooperative_matrix:
1089 return t1->type == t2->type;
1090
1091 case vtn_base_type_array:
1092 return t1->length == t2->length &&
1093 vtn_types_compatible(b, t1->array_element, t2->array_element);
1094
1095 case vtn_base_type_pointer:
1096 return vtn_types_compatible(b, t1->pointed, t2->pointed);
1097
1098 case vtn_base_type_struct:
1099 if (t1->length != t2->length)
1100 return false;
1101
1102 for (unsigned i = 0; i < t1->length; i++) {
1103 if (!vtn_types_compatible(b, t1->members[i], t2->members[i]))
1104 return false;
1105 }
1106 return true;
1107
1108 case vtn_base_type_accel_struct:
1109 case vtn_base_type_ray_query:
1110 return true;
1111
1112 case vtn_base_type_function:
1113 /* This case shouldn't get hit since you can't copy around function
1114 * types. Just require them to be identical.
1115 */
1116 return false;
1117 }
1118
1119 vtn_fail("Invalid base type");
1120 }
1121
1122 struct vtn_type *
vtn_type_without_array(struct vtn_type * type)1123 vtn_type_without_array(struct vtn_type *type)
1124 {
1125 while (type->base_type == vtn_base_type_array)
1126 type = type->array_element;
1127 return type;
1128 }
1129
1130 /* does a shallow copy of a vtn_type */
1131
1132 static struct vtn_type *
vtn_type_copy(struct vtn_builder * b,struct vtn_type * src)1133 vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
1134 {
1135 struct vtn_type *dest = vtn_alloc(b, struct vtn_type);
1136 *dest = *src;
1137
1138 switch (src->base_type) {
1139 case vtn_base_type_void:
1140 case vtn_base_type_scalar:
1141 case vtn_base_type_vector:
1142 case vtn_base_type_matrix:
1143 case vtn_base_type_array:
1144 case vtn_base_type_pointer:
1145 case vtn_base_type_image:
1146 case vtn_base_type_sampler:
1147 case vtn_base_type_sampled_image:
1148 case vtn_base_type_event:
1149 case vtn_base_type_accel_struct:
1150 case vtn_base_type_ray_query:
1151 case vtn_base_type_cooperative_matrix:
1152 /* Nothing more to do */
1153 break;
1154
1155 case vtn_base_type_struct:
1156 dest->members = vtn_alloc_array(b, struct vtn_type *, src->length);
1157 memcpy(dest->members, src->members,
1158 src->length * sizeof(src->members[0]));
1159
1160 dest->offsets = vtn_alloc_array(b, unsigned, src->length);
1161 memcpy(dest->offsets, src->offsets,
1162 src->length * sizeof(src->offsets[0]));
1163 break;
1164
1165 case vtn_base_type_function:
1166 dest->params = vtn_alloc_array(b, struct vtn_type *, src->length);
1167 memcpy(dest->params, src->params, src->length * sizeof(src->params[0]));
1168 break;
1169 }
1170
1171 return dest;
1172 }
1173
1174 static bool
vtn_type_needs_explicit_layout(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)1175 vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type,
1176 enum vtn_variable_mode mode)
1177 {
1178 /* For OpenCL we never want to strip the info from the types, and it makes
1179 * type comparisons easier in later stages.
1180 */
1181 if (b->options->environment == NIR_SPIRV_OPENCL)
1182 return true;
1183
1184 switch (mode) {
1185 case vtn_variable_mode_input:
1186 case vtn_variable_mode_output:
1187 /* Layout decorations kept because we need offsets for XFB arrays of
1188 * blocks.
1189 */
1190 return b->shader->info.has_transform_feedback_varyings;
1191
1192 case vtn_variable_mode_ssbo:
1193 case vtn_variable_mode_phys_ssbo:
1194 case vtn_variable_mode_ubo:
1195 case vtn_variable_mode_push_constant:
1196 case vtn_variable_mode_shader_record:
1197 return true;
1198
1199 case vtn_variable_mode_workgroup:
1200 return b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR;
1201
1202 default:
1203 return false;
1204 }
1205 }
1206
1207 const struct glsl_type *
vtn_type_get_nir_type(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)1208 vtn_type_get_nir_type(struct vtn_builder *b, struct vtn_type *type,
1209 enum vtn_variable_mode mode)
1210 {
1211 if (mode == vtn_variable_mode_atomic_counter) {
1212 vtn_fail_if(glsl_without_array(type->type) != glsl_uint_type(),
1213 "Variables in the AtomicCounter storage class should be "
1214 "(possibly arrays of arrays of) uint.");
1215 return glsl_type_wrap_in_arrays(glsl_atomic_uint_type(), type->type);
1216 }
1217
1218 if (mode == vtn_variable_mode_uniform) {
1219 switch (type->base_type) {
1220 case vtn_base_type_array: {
1221 const struct glsl_type *elem_type =
1222 vtn_type_get_nir_type(b, type->array_element, mode);
1223
1224 return glsl_array_type(elem_type, type->length,
1225 glsl_get_explicit_stride(type->type));
1226 }
1227
1228 case vtn_base_type_struct: {
1229 bool need_new_struct = false;
1230 const uint32_t num_fields = type->length;
1231 NIR_VLA(struct glsl_struct_field, fields, num_fields);
1232 for (unsigned i = 0; i < num_fields; i++) {
1233 fields[i] = *glsl_get_struct_field_data(type->type, i);
1234 const struct glsl_type *field_nir_type =
1235 vtn_type_get_nir_type(b, type->members[i], mode);
1236 if (fields[i].type != field_nir_type) {
1237 fields[i].type = field_nir_type;
1238 need_new_struct = true;
1239 }
1240 }
1241 if (need_new_struct) {
1242 if (glsl_type_is_interface(type->type)) {
1243 return glsl_interface_type(fields, num_fields,
1244 /* packing */ 0, false,
1245 glsl_get_type_name(type->type));
1246 } else {
1247 return glsl_struct_type(fields, num_fields,
1248 glsl_get_type_name(type->type),
1249 glsl_struct_type_is_packed(type->type));
1250 }
1251 } else {
1252 /* No changes, just pass it on */
1253 return type->type;
1254 }
1255 }
1256
1257 case vtn_base_type_image:
1258 vtn_assert(glsl_type_is_texture(type->glsl_image));
1259 return type->glsl_image;
1260
1261 case vtn_base_type_sampler:
1262 return glsl_bare_sampler_type();
1263
1264 case vtn_base_type_sampled_image:
1265 return glsl_texture_type_to_sampler(type->image->glsl_image,
1266 false /* is_shadow */);
1267
1268 default:
1269 return type->type;
1270 }
1271 }
1272
1273 if (mode == vtn_variable_mode_image) {
1274 struct vtn_type *image_type = vtn_type_without_array(type);
1275 vtn_assert(image_type->base_type == vtn_base_type_image);
1276 return glsl_type_wrap_in_arrays(image_type->glsl_image, type->type);
1277 }
1278
1279 /* Layout decorations are allowed but ignored in certain conditions,
1280 * to allow SPIR-V generators perform type deduplication. Discard
1281 * unnecessary ones when passing to NIR.
1282 */
1283 if (!vtn_type_needs_explicit_layout(b, type, mode))
1284 return glsl_get_bare_type(type->type);
1285
1286 return type->type;
1287 }
1288
1289 static struct vtn_type *
mutable_matrix_member(struct vtn_builder * b,struct vtn_type * type,int member)1290 mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
1291 {
1292 type->members[member] = vtn_type_copy(b, type->members[member]);
1293 type = type->members[member];
1294
1295 /* We may have an array of matrices.... Oh, joy! */
1296 while (glsl_type_is_array(type->type)) {
1297 type->array_element = vtn_type_copy(b, type->array_element);
1298 type = type->array_element;
1299 }
1300
1301 vtn_assert(glsl_type_is_matrix(type->type));
1302
1303 return type;
1304 }
1305
1306 static void
vtn_handle_access_qualifier(struct vtn_builder * b,struct vtn_type * type,int member,enum gl_access_qualifier access)1307 vtn_handle_access_qualifier(struct vtn_builder *b, struct vtn_type *type,
1308 int member, enum gl_access_qualifier access)
1309 {
1310 type->members[member] = vtn_type_copy(b, type->members[member]);
1311 type = type->members[member];
1312
1313 type->access |= access;
1314 }
1315
1316 static void
array_stride_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1317 array_stride_decoration_cb(struct vtn_builder *b,
1318 struct vtn_value *val, int member,
1319 const struct vtn_decoration *dec, void *void_ctx)
1320 {
1321 struct vtn_type *type = val->type;
1322
1323 if (dec->decoration == SpvDecorationArrayStride) {
1324 if (vtn_type_contains_block(b, type)) {
1325 vtn_warn("The ArrayStride decoration cannot be applied to an array "
1326 "type which contains a structure type decorated Block "
1327 "or BufferBlock");
1328 /* Ignore the decoration */
1329 } else {
1330 vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero");
1331 type->stride = dec->operands[0];
1332 }
1333 }
1334 }
1335
1336 static void
struct_member_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1337 struct_member_decoration_cb(struct vtn_builder *b,
1338 UNUSED struct vtn_value *val, int member,
1339 const struct vtn_decoration *dec, void *void_ctx)
1340 {
1341 struct member_decoration_ctx *ctx = void_ctx;
1342
1343 if (member < 0)
1344 return;
1345
1346 assert(member < ctx->num_fields);
1347
1348 switch (dec->decoration) {
1349 case SpvDecorationRelaxedPrecision:
1350 case SpvDecorationUniform:
1351 case SpvDecorationUniformId:
1352 break; /* FIXME: Do nothing with this for now. */
1353 case SpvDecorationNonWritable:
1354 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_WRITEABLE);
1355 break;
1356 case SpvDecorationNonReadable:
1357 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_READABLE);
1358 break;
1359 case SpvDecorationVolatile:
1360 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_VOLATILE);
1361 break;
1362 case SpvDecorationCoherent:
1363 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_COHERENT);
1364 break;
1365 case SpvDecorationNoPerspective:
1366 ctx->fields[member].interpolation = INTERP_MODE_NOPERSPECTIVE;
1367 break;
1368 case SpvDecorationFlat:
1369 ctx->fields[member].interpolation = INTERP_MODE_FLAT;
1370 break;
1371 case SpvDecorationExplicitInterpAMD:
1372 ctx->fields[member].interpolation = INTERP_MODE_EXPLICIT;
1373 break;
1374 case SpvDecorationCentroid:
1375 ctx->fields[member].centroid = true;
1376 break;
1377 case SpvDecorationSample:
1378 ctx->fields[member].sample = true;
1379 break;
1380 case SpvDecorationStream:
1381 /* This is handled later by var_decoration_cb in vtn_variables.c */
1382 break;
1383 case SpvDecorationLocation:
1384 ctx->fields[member].location = dec->operands[0];
1385 break;
1386 case SpvDecorationComponent:
1387 break; /* FIXME: What should we do with these? */
1388 case SpvDecorationBuiltIn:
1389 ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
1390 ctx->type->members[member]->is_builtin = true;
1391 ctx->type->members[member]->builtin = dec->operands[0];
1392 ctx->type->builtin_block = true;
1393 break;
1394 case SpvDecorationOffset:
1395 ctx->type->offsets[member] = dec->operands[0];
1396 ctx->fields[member].offset = dec->operands[0];
1397 break;
1398 case SpvDecorationMatrixStride:
1399 /* Handled as a second pass */
1400 break;
1401 case SpvDecorationColMajor:
1402 break; /* Nothing to do here. Column-major is the default. */
1403 case SpvDecorationRowMajor:
1404 mutable_matrix_member(b, ctx->type, member)->row_major = true;
1405 break;
1406
1407 case SpvDecorationPatch:
1408 case SpvDecorationPerPrimitiveNV:
1409 case SpvDecorationPerTaskNV:
1410 case SpvDecorationPerViewNV:
1411 break;
1412
1413 case SpvDecorationSpecId:
1414 case SpvDecorationBlock:
1415 case SpvDecorationBufferBlock:
1416 case SpvDecorationArrayStride:
1417 case SpvDecorationGLSLShared:
1418 case SpvDecorationGLSLPacked:
1419 case SpvDecorationAliased:
1420 case SpvDecorationConstant:
1421 case SpvDecorationIndex:
1422 case SpvDecorationBinding:
1423 case SpvDecorationDescriptorSet:
1424 case SpvDecorationLinkageAttributes:
1425 case SpvDecorationNoContraction:
1426 case SpvDecorationInputAttachmentIndex:
1427 case SpvDecorationCPacked:
1428 vtn_warn("Decoration not allowed on struct members: %s",
1429 spirv_decoration_to_string(dec->decoration));
1430 break;
1431
1432 case SpvDecorationRestrict:
1433 /* While "Restrict" is invalid for struct members, glslang incorrectly
1434 * generates it and it ends up hiding actual driver issues in a wall of
1435 * spam from deqp-vk. Return it to the above block once the issue is
1436 * resolved. https://github.com/KhronosGroup/glslang/issues/703
1437 */
1438 break;
1439
1440 case SpvDecorationInvariant:
1441 /* Also incorrectly generated by glslang, ignore it. */
1442 break;
1443
1444 case SpvDecorationXfbBuffer:
1445 case SpvDecorationXfbStride:
1446 /* This is handled later by var_decoration_cb in vtn_variables.c */
1447 break;
1448
1449 case SpvDecorationSaturatedConversion:
1450 case SpvDecorationFuncParamAttr:
1451 case SpvDecorationFPRoundingMode:
1452 case SpvDecorationAlignment:
1453 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1454 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1455 spirv_decoration_to_string(dec->decoration));
1456 }
1457 break;
1458
1459 case SpvDecorationFPFastMathMode:
1460 /* See handle_fp_fast_math(). */
1461 break;
1462
1463 case SpvDecorationUserSemantic:
1464 case SpvDecorationUserTypeGOOGLE:
1465 /* User semantic decorations can safely be ignored by the driver. */
1466 break;
1467
1468 default:
1469 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1470 }
1471 }
1472
1473 /** Chases the array type all the way down to the tail and rewrites the
1474 * glsl_types to be based off the tail's glsl_type.
1475 */
1476 static void
vtn_array_type_rewrite_glsl_type(struct vtn_type * type)1477 vtn_array_type_rewrite_glsl_type(struct vtn_type *type)
1478 {
1479 if (type->base_type != vtn_base_type_array)
1480 return;
1481
1482 vtn_array_type_rewrite_glsl_type(type->array_element);
1483
1484 type->type = glsl_array_type(type->array_element->type,
1485 type->length, type->stride);
1486 }
1487
1488 /* Matrix strides are handled as a separate pass because we need to know
1489 * whether the matrix is row-major or not first.
1490 */
1491 static void
struct_member_matrix_stride_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1492 struct_member_matrix_stride_cb(struct vtn_builder *b,
1493 UNUSED struct vtn_value *val, int member,
1494 const struct vtn_decoration *dec,
1495 void *void_ctx)
1496 {
1497 if (dec->decoration != SpvDecorationMatrixStride)
1498 return;
1499
1500 vtn_fail_if(member < 0,
1501 "The MatrixStride decoration is only allowed on members "
1502 "of OpTypeStruct");
1503 vtn_fail_if(dec->operands[0] == 0, "MatrixStride must be non-zero");
1504
1505 struct member_decoration_ctx *ctx = void_ctx;
1506
1507 struct vtn_type *mat_type = mutable_matrix_member(b, ctx->type, member);
1508 if (mat_type->row_major) {
1509 mat_type->array_element = vtn_type_copy(b, mat_type->array_element);
1510 mat_type->stride = mat_type->array_element->stride;
1511 mat_type->array_element->stride = dec->operands[0];
1512
1513 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1514 dec->operands[0], true);
1515 mat_type->array_element->type = glsl_get_column_type(mat_type->type);
1516 } else {
1517 vtn_assert(mat_type->array_element->stride > 0);
1518 mat_type->stride = dec->operands[0];
1519
1520 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1521 dec->operands[0], false);
1522 }
1523
1524 /* Now that we've replaced the glsl_type with a properly strided matrix
1525 * type, rewrite the member type so that it's an array of the proper kind
1526 * of glsl_type.
1527 */
1528 vtn_array_type_rewrite_glsl_type(ctx->type->members[member]);
1529 ctx->fields[member].type = ctx->type->members[member]->type;
1530 }
1531
1532 static void
struct_packed_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1533 struct_packed_decoration_cb(struct vtn_builder *b,
1534 struct vtn_value *val, int member,
1535 const struct vtn_decoration *dec, void *void_ctx)
1536 {
1537 vtn_assert(val->type->base_type == vtn_base_type_struct);
1538 if (dec->decoration == SpvDecorationCPacked) {
1539 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1540 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1541 spirv_decoration_to_string(dec->decoration));
1542 }
1543 val->type->packed = true;
1544 }
1545 }
1546
1547 static void
struct_block_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * ctx)1548 struct_block_decoration_cb(struct vtn_builder *b,
1549 struct vtn_value *val, int member,
1550 const struct vtn_decoration *dec, void *ctx)
1551 {
1552 if (member != -1)
1553 return;
1554
1555 struct vtn_type *type = val->type;
1556 if (dec->decoration == SpvDecorationBlock)
1557 type->block = true;
1558 else if (dec->decoration == SpvDecorationBufferBlock)
1559 type->buffer_block = true;
1560 }
1561
1562 static void
type_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,UNUSED void * ctx)1563 type_decoration_cb(struct vtn_builder *b,
1564 struct vtn_value *val, int member,
1565 const struct vtn_decoration *dec, UNUSED void *ctx)
1566 {
1567 struct vtn_type *type = val->type;
1568
1569 if (member != -1) {
1570 /* This should have been handled by OpTypeStruct */
1571 assert(val->type->base_type == vtn_base_type_struct);
1572 assert(member >= 0 && member < val->type->length);
1573 return;
1574 }
1575
1576 switch (dec->decoration) {
1577 case SpvDecorationArrayStride:
1578 vtn_assert(type->base_type == vtn_base_type_array ||
1579 type->base_type == vtn_base_type_pointer);
1580 break;
1581 case SpvDecorationBlock:
1582 vtn_assert(type->base_type == vtn_base_type_struct);
1583 vtn_assert(type->block);
1584 break;
1585 case SpvDecorationBufferBlock:
1586 vtn_assert(type->base_type == vtn_base_type_struct);
1587 vtn_assert(type->buffer_block);
1588 break;
1589 case SpvDecorationGLSLShared:
1590 case SpvDecorationGLSLPacked:
1591 /* Ignore these, since we get explicit offsets anyways */
1592 break;
1593
1594 case SpvDecorationRowMajor:
1595 case SpvDecorationColMajor:
1596 case SpvDecorationMatrixStride:
1597 case SpvDecorationBuiltIn:
1598 case SpvDecorationNoPerspective:
1599 case SpvDecorationFlat:
1600 case SpvDecorationPatch:
1601 case SpvDecorationCentroid:
1602 case SpvDecorationSample:
1603 case SpvDecorationExplicitInterpAMD:
1604 case SpvDecorationVolatile:
1605 case SpvDecorationCoherent:
1606 case SpvDecorationNonWritable:
1607 case SpvDecorationNonReadable:
1608 case SpvDecorationUniform:
1609 case SpvDecorationUniformId:
1610 case SpvDecorationLocation:
1611 case SpvDecorationComponent:
1612 case SpvDecorationOffset:
1613 case SpvDecorationXfbBuffer:
1614 case SpvDecorationXfbStride:
1615 case SpvDecorationUserSemantic:
1616 vtn_warn("Decoration only allowed for struct members: %s",
1617 spirv_decoration_to_string(dec->decoration));
1618 break;
1619
1620 case SpvDecorationStream:
1621 /* We don't need to do anything here, as stream is filled up when
1622 * aplying the decoration to a variable, just check that if it is not a
1623 * struct member, it should be a struct.
1624 */
1625 vtn_assert(type->base_type == vtn_base_type_struct);
1626 break;
1627
1628 case SpvDecorationRelaxedPrecision:
1629 case SpvDecorationSpecId:
1630 case SpvDecorationInvariant:
1631 case SpvDecorationRestrict:
1632 case SpvDecorationAliased:
1633 case SpvDecorationConstant:
1634 case SpvDecorationIndex:
1635 case SpvDecorationBinding:
1636 case SpvDecorationDescriptorSet:
1637 case SpvDecorationLinkageAttributes:
1638 case SpvDecorationNoContraction:
1639 case SpvDecorationInputAttachmentIndex:
1640 vtn_warn("Decoration not allowed on types: %s",
1641 spirv_decoration_to_string(dec->decoration));
1642 break;
1643
1644 case SpvDecorationCPacked:
1645 /* Handled when parsing a struct type, nothing to do here. */
1646 break;
1647
1648 case SpvDecorationSaturatedConversion:
1649 case SpvDecorationFuncParamAttr:
1650 case SpvDecorationFPRoundingMode:
1651 case SpvDecorationAlignment:
1652 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1653 spirv_decoration_to_string(dec->decoration));
1654 break;
1655
1656 case SpvDecorationFPFastMathMode:
1657 /* See handle_fp_fast_math(). */
1658 break;
1659
1660 case SpvDecorationUserTypeGOOGLE:
1661 /* User semantic decorations can safely be ignored by the driver. */
1662 break;
1663
1664 default:
1665 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1666 }
1667 }
1668
1669 static unsigned
translate_image_format(struct vtn_builder * b,SpvImageFormat format)1670 translate_image_format(struct vtn_builder *b, SpvImageFormat format)
1671 {
1672 switch (format) {
1673 case SpvImageFormatUnknown: return PIPE_FORMAT_NONE;
1674 case SpvImageFormatRgba32f: return PIPE_FORMAT_R32G32B32A32_FLOAT;
1675 case SpvImageFormatRgba16f: return PIPE_FORMAT_R16G16B16A16_FLOAT;
1676 case SpvImageFormatR32f: return PIPE_FORMAT_R32_FLOAT;
1677 case SpvImageFormatRgba8: return PIPE_FORMAT_R8G8B8A8_UNORM;
1678 case SpvImageFormatRgba8Snorm: return PIPE_FORMAT_R8G8B8A8_SNORM;
1679 case SpvImageFormatRg32f: return PIPE_FORMAT_R32G32_FLOAT;
1680 case SpvImageFormatRg16f: return PIPE_FORMAT_R16G16_FLOAT;
1681 case SpvImageFormatR11fG11fB10f: return PIPE_FORMAT_R11G11B10_FLOAT;
1682 case SpvImageFormatR16f: return PIPE_FORMAT_R16_FLOAT;
1683 case SpvImageFormatRgba16: return PIPE_FORMAT_R16G16B16A16_UNORM;
1684 case SpvImageFormatRgb10A2: return PIPE_FORMAT_R10G10B10A2_UNORM;
1685 case SpvImageFormatRg16: return PIPE_FORMAT_R16G16_UNORM;
1686 case SpvImageFormatRg8: return PIPE_FORMAT_R8G8_UNORM;
1687 case SpvImageFormatR16: return PIPE_FORMAT_R16_UNORM;
1688 case SpvImageFormatR8: return PIPE_FORMAT_R8_UNORM;
1689 case SpvImageFormatRgba16Snorm: return PIPE_FORMAT_R16G16B16A16_SNORM;
1690 case SpvImageFormatRg16Snorm: return PIPE_FORMAT_R16G16_SNORM;
1691 case SpvImageFormatRg8Snorm: return PIPE_FORMAT_R8G8_SNORM;
1692 case SpvImageFormatR16Snorm: return PIPE_FORMAT_R16_SNORM;
1693 case SpvImageFormatR8Snorm: return PIPE_FORMAT_R8_SNORM;
1694 case SpvImageFormatRgba32i: return PIPE_FORMAT_R32G32B32A32_SINT;
1695 case SpvImageFormatRgba16i: return PIPE_FORMAT_R16G16B16A16_SINT;
1696 case SpvImageFormatRgba8i: return PIPE_FORMAT_R8G8B8A8_SINT;
1697 case SpvImageFormatR32i: return PIPE_FORMAT_R32_SINT;
1698 case SpvImageFormatRg32i: return PIPE_FORMAT_R32G32_SINT;
1699 case SpvImageFormatRg16i: return PIPE_FORMAT_R16G16_SINT;
1700 case SpvImageFormatRg8i: return PIPE_FORMAT_R8G8_SINT;
1701 case SpvImageFormatR16i: return PIPE_FORMAT_R16_SINT;
1702 case SpvImageFormatR8i: return PIPE_FORMAT_R8_SINT;
1703 case SpvImageFormatRgba32ui: return PIPE_FORMAT_R32G32B32A32_UINT;
1704 case SpvImageFormatRgba16ui: return PIPE_FORMAT_R16G16B16A16_UINT;
1705 case SpvImageFormatRgba8ui: return PIPE_FORMAT_R8G8B8A8_UINT;
1706 case SpvImageFormatR32ui: return PIPE_FORMAT_R32_UINT;
1707 case SpvImageFormatRgb10a2ui: return PIPE_FORMAT_R10G10B10A2_UINT;
1708 case SpvImageFormatRg32ui: return PIPE_FORMAT_R32G32_UINT;
1709 case SpvImageFormatRg16ui: return PIPE_FORMAT_R16G16_UINT;
1710 case SpvImageFormatRg8ui: return PIPE_FORMAT_R8G8_UINT;
1711 case SpvImageFormatR16ui: return PIPE_FORMAT_R16_UINT;
1712 case SpvImageFormatR8ui: return PIPE_FORMAT_R8_UINT;
1713 case SpvImageFormatR64ui: return PIPE_FORMAT_R64_UINT;
1714 case SpvImageFormatR64i: return PIPE_FORMAT_R64_SINT;
1715 default:
1716 vtn_fail("Invalid image format: %s (%u)",
1717 spirv_imageformat_to_string(format), format);
1718 }
1719 }
1720
1721 static void
validate_image_type_for_sampled_image(struct vtn_builder * b,const struct glsl_type * image_type,const char * operand)1722 validate_image_type_for_sampled_image(struct vtn_builder *b,
1723 const struct glsl_type *image_type,
1724 const char *operand)
1725 {
1726 /* From OpTypeSampledImage description in SPIR-V 1.6, revision 1:
1727 *
1728 * Image Type must be an OpTypeImage. It is the type of the image in the
1729 * combined sampler and image type. It must not have a Dim of
1730 * SubpassData. Additionally, starting with version 1.6, it must not have
1731 * a Dim of Buffer.
1732 *
1733 * Same also applies to the type of the Image operand in OpSampledImage.
1734 */
1735
1736 const enum glsl_sampler_dim dim = glsl_get_sampler_dim(image_type);
1737
1738 vtn_fail_if(dim == GLSL_SAMPLER_DIM_SUBPASS ||
1739 dim == GLSL_SAMPLER_DIM_SUBPASS_MS,
1740 "%s must not have a Dim of SubpassData.", operand);
1741
1742 if (dim == GLSL_SAMPLER_DIM_BUF) {
1743 if (b->version >= 0x10600) {
1744 vtn_fail("Starting with SPIR-V 1.6, %s "
1745 "must not have a Dim of Buffer.", operand);
1746 } else {
1747 vtn_warn("%s should not have a Dim of Buffer.", operand);
1748 }
1749 }
1750 }
1751
1752 static void
vtn_handle_type(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1753 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
1754 const uint32_t *w, unsigned count)
1755 {
1756 struct vtn_value *val = NULL;
1757
1758 /* In order to properly handle forward declarations, we have to defer
1759 * allocation for pointer types.
1760 */
1761 if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) {
1762 val = vtn_push_value(b, w[1], vtn_value_type_type);
1763 vtn_fail_if(val->type != NULL,
1764 "Only pointers can have forward declarations");
1765 val->type = vtn_zalloc(b, struct vtn_type);
1766 val->type->id = w[1];
1767 }
1768
1769 switch (opcode) {
1770 case SpvOpTypeVoid:
1771 val->type->base_type = vtn_base_type_void;
1772 val->type->type = glsl_void_type();
1773 break;
1774 case SpvOpTypeBool:
1775 val->type->base_type = vtn_base_type_scalar;
1776 val->type->type = glsl_bool_type();
1777 val->type->length = 1;
1778 break;
1779 case SpvOpTypeInt: {
1780 int bit_size = w[2];
1781 const bool signedness = w[3];
1782 vtn_fail_if(bit_size != 8 && bit_size != 16 &&
1783 bit_size != 32 && bit_size != 64,
1784 "Invalid int bit size: %u", bit_size);
1785 val->type->base_type = vtn_base_type_scalar;
1786 val->type->type = signedness ? glsl_intN_t_type(bit_size) :
1787 glsl_uintN_t_type(bit_size);
1788 val->type->length = 1;
1789 break;
1790 }
1791
1792 case SpvOpTypeFloat: {
1793 int bit_size = w[2];
1794 val->type->base_type = vtn_base_type_scalar;
1795 vtn_fail_if(bit_size != 16 && bit_size != 32 && bit_size != 64,
1796 "Invalid float bit size: %u", bit_size);
1797 val->type->type = glsl_floatN_t_type(bit_size);
1798 val->type->length = 1;
1799 break;
1800 }
1801
1802 case SpvOpTypeVector: {
1803 struct vtn_type *base = vtn_get_type(b, w[2]);
1804 unsigned elems = w[3];
1805
1806 vtn_fail_if(base->base_type != vtn_base_type_scalar,
1807 "Base type for OpTypeVector must be a scalar");
1808 vtn_fail_if((elems < 2 || elems > 4) && (elems != 8) && (elems != 16),
1809 "Invalid component count for OpTypeVector");
1810
1811 val->type->base_type = vtn_base_type_vector;
1812 val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
1813 val->type->length = elems;
1814 val->type->stride = glsl_type_is_boolean(val->type->type)
1815 ? 4 : glsl_get_bit_size(base->type) / 8;
1816 val->type->array_element = base;
1817 break;
1818 }
1819
1820 case SpvOpTypeMatrix: {
1821 struct vtn_type *base = vtn_get_type(b, w[2]);
1822 unsigned columns = w[3];
1823
1824 vtn_fail_if(base->base_type != vtn_base_type_vector,
1825 "Base type for OpTypeMatrix must be a vector");
1826 vtn_fail_if(columns < 2 || columns > 4,
1827 "Invalid column count for OpTypeMatrix");
1828
1829 val->type->base_type = vtn_base_type_matrix;
1830 val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
1831 glsl_get_vector_elements(base->type),
1832 columns);
1833 vtn_fail_if(glsl_type_is_error(val->type->type),
1834 "Unsupported base type for OpTypeMatrix");
1835 assert(!glsl_type_is_error(val->type->type));
1836 val->type->length = columns;
1837 val->type->array_element = base;
1838 val->type->row_major = false;
1839 val->type->stride = 0;
1840 break;
1841 }
1842
1843 case SpvOpTypeRuntimeArray:
1844 case SpvOpTypeArray: {
1845 struct vtn_type *array_element = vtn_get_type(b, w[2]);
1846
1847 if (opcode == SpvOpTypeRuntimeArray) {
1848 /* A length of 0 is used to denote unsized arrays */
1849 val->type->length = 0;
1850 } else {
1851 val->type->length = vtn_constant_uint(b, w[3]);
1852 }
1853
1854 val->type->base_type = vtn_base_type_array;
1855 val->type->array_element = array_element;
1856
1857 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1858 val->type->type = glsl_array_type(array_element->type, val->type->length,
1859 val->type->stride);
1860 break;
1861 }
1862
1863 case SpvOpTypeStruct: {
1864 unsigned num_fields = count - 2;
1865 val->type->base_type = vtn_base_type_struct;
1866 val->type->length = num_fields;
1867 val->type->members = vtn_alloc_array(b, struct vtn_type *, num_fields);
1868 val->type->offsets = vtn_alloc_array(b, unsigned, num_fields);
1869 val->type->packed = false;
1870
1871 NIR_VLA(struct glsl_struct_field, fields, count);
1872 for (unsigned i = 0; i < num_fields; i++) {
1873 val->type->members[i] = vtn_get_type(b, w[i + 2]);
1874 const char *name = NULL;
1875 for (struct vtn_decoration *dec = val->decoration; dec; dec = dec->next) {
1876 if (dec->scope == VTN_DEC_STRUCT_MEMBER_NAME0 - i) {
1877 name = dec->member_name;
1878 break;
1879 }
1880 }
1881 if (!name)
1882 name = ralloc_asprintf(b, "field%d", i);
1883
1884 fields[i] = (struct glsl_struct_field) {
1885 .type = val->type->members[i]->type,
1886 .name = name,
1887 .location = -1,
1888 .offset = -1,
1889 };
1890 }
1891
1892 vtn_foreach_decoration(b, val, struct_packed_decoration_cb, NULL);
1893
1894 struct member_decoration_ctx ctx = {
1895 .num_fields = num_fields,
1896 .fields = fields,
1897 .type = val->type
1898 };
1899
1900 vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
1901
1902 /* Propagate access specifiers that are present on all members to the overall type */
1903 enum gl_access_qualifier overall_access = ACCESS_COHERENT | ACCESS_VOLATILE |
1904 ACCESS_NON_READABLE | ACCESS_NON_WRITEABLE;
1905 for (unsigned i = 0; i < num_fields; ++i)
1906 overall_access &= val->type->members[i]->access;
1907 val->type->access = overall_access;
1908
1909 vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx);
1910
1911 vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL);
1912
1913 const char *name = val->name;
1914
1915 if (val->type->block || val->type->buffer_block) {
1916 /* Packing will be ignored since types coming from SPIR-V are
1917 * explicitly laid out.
1918 */
1919 val->type->type = glsl_interface_type(fields, num_fields,
1920 /* packing */ 0, false,
1921 name ? name : "block");
1922 } else {
1923 val->type->type = glsl_struct_type(fields, num_fields,
1924 name ? name : "struct",
1925 val->type->packed);
1926 }
1927 break;
1928 }
1929
1930 case SpvOpTypeFunction: {
1931 val->type->base_type = vtn_base_type_function;
1932 val->type->type = NULL;
1933
1934 val->type->return_type = vtn_get_type(b, w[2]);
1935
1936 const unsigned num_params = count - 3;
1937 val->type->length = num_params;
1938 val->type->params = vtn_alloc_array(b, struct vtn_type *, num_params);
1939 for (unsigned i = 0; i < count - 3; i++) {
1940 val->type->params[i] = vtn_get_type(b, w[i + 3]);
1941 }
1942 break;
1943 }
1944
1945 case SpvOpTypePointer:
1946 case SpvOpTypeForwardPointer: {
1947 /* We can't blindly push the value because it might be a forward
1948 * declaration.
1949 */
1950 val = vtn_untyped_value(b, w[1]);
1951
1952 SpvStorageClass storage_class = w[2];
1953
1954 vtn_fail_if(opcode == SpvOpTypeForwardPointer &&
1955 b->shader->info.stage != MESA_SHADER_KERNEL &&
1956 storage_class != SpvStorageClassPhysicalStorageBuffer,
1957 "OpTypeForwardPointer is only allowed in Vulkan with "
1958 "the PhysicalStorageBuffer storage class");
1959
1960 struct vtn_type *pointed_type = NULL;
1961 if (opcode == SpvOpTypePointer)
1962 pointed_type = vtn_get_type(b, w[3]);
1963
1964 bool has_forward_pointer = false;
1965 if (val->value_type == vtn_value_type_invalid) {
1966 val->value_type = vtn_value_type_type;
1967 val->type = vtn_zalloc(b, struct vtn_type);
1968 val->type->id = w[1];
1969 val->type->base_type = vtn_base_type_pointer;
1970 val->type->storage_class = storage_class;
1971
1972 /* These can actually be stored to nir_variables and used as SSA
1973 * values so they need a real glsl_type.
1974 */
1975 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
1976 b, storage_class, pointed_type, NULL);
1977
1978 /* The deref type should only matter for the UniformConstant storage
1979 * class. In particular, it should never matter for any storage
1980 * classes that are allowed in combination with OpTypeForwardPointer.
1981 */
1982 if (storage_class != SpvStorageClassUniform &&
1983 storage_class != SpvStorageClassUniformConstant) {
1984 assert(mode == vtn_storage_class_to_mode(b, storage_class,
1985 NULL, NULL));
1986 }
1987
1988 val->type->type = nir_address_format_to_glsl_type(
1989 vtn_mode_to_address_format(b, mode));
1990 } else {
1991 vtn_fail_if(val->type->storage_class != storage_class,
1992 "The storage classes of an OpTypePointer and any "
1993 "OpTypeForwardPointers that provide forward "
1994 "declarations of it must match.");
1995 has_forward_pointer = true;
1996 }
1997
1998 if (opcode == SpvOpTypePointer) {
1999 vtn_fail_if(val->type->pointed != NULL,
2000 "While OpTypeForwardPointer can be used to provide a "
2001 "forward declaration of a pointer, OpTypePointer can "
2002 "only be used once for a given id.");
2003
2004 vtn_fail_if(has_forward_pointer &&
2005 pointed_type->base_type != vtn_base_type_struct,
2006 "An OpTypePointer instruction must declare "
2007 "Pointer Type to be a pointer to an OpTypeStruct.");
2008
2009 val->type->pointed = pointed_type;
2010
2011 /* Only certain storage classes use ArrayStride. */
2012 switch (storage_class) {
2013 case SpvStorageClassWorkgroup:
2014 if (!b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR)
2015 break;
2016 FALLTHROUGH;
2017
2018 case SpvStorageClassUniform:
2019 case SpvStorageClassPushConstant:
2020 case SpvStorageClassStorageBuffer:
2021 case SpvStorageClassPhysicalStorageBuffer:
2022 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
2023 break;
2024
2025 default:
2026 /* Nothing to do. */
2027 break;
2028 }
2029 }
2030 break;
2031 }
2032
2033 case SpvOpTypeImage: {
2034 val->type->base_type = vtn_base_type_image;
2035
2036 /* Images are represented in NIR as a scalar SSA value that is the
2037 * result of a deref instruction. An OpLoad on an OpTypeImage pointer
2038 * from UniformConstant memory just takes the NIR deref from the pointer
2039 * and turns it into an SSA value.
2040 */
2041 val->type->type = nir_address_format_to_glsl_type(
2042 vtn_mode_to_address_format(b, vtn_variable_mode_function));
2043
2044 const struct vtn_type *sampled_type = vtn_get_type(b, w[2]);
2045 if (b->shader->info.stage == MESA_SHADER_KERNEL) {
2046 vtn_fail_if(sampled_type->base_type != vtn_base_type_void,
2047 "Sampled type of OpTypeImage must be void for kernels");
2048 } else {
2049 vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar,
2050 "Sampled type of OpTypeImage must be a scalar");
2051 if (b->supported_capabilities.Int64ImageEXT) {
2052 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32 &&
2053 glsl_get_bit_size(sampled_type->type) != 64,
2054 "Sampled type of OpTypeImage must be a 32 or 64-bit "
2055 "scalar");
2056 } else {
2057 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32,
2058 "Sampled type of OpTypeImage must be a 32-bit scalar");
2059 }
2060 }
2061
2062 enum glsl_sampler_dim dim;
2063 switch ((SpvDim)w[3]) {
2064 case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break;
2065 case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break;
2066 case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break;
2067 case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break;
2068 case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break;
2069 case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break;
2070 case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break;
2071 default:
2072 vtn_fail("Invalid SPIR-V image dimensionality: %s (%u)",
2073 spirv_dim_to_string((SpvDim)w[3]), w[3]);
2074 }
2075
2076 /* w[4]: as per Vulkan spec "Validation Rules within a Module",
2077 * The “Depth” operand of OpTypeImage is ignored.
2078 */
2079 bool is_array = w[5];
2080 bool multisampled = w[6];
2081 unsigned sampled = w[7];
2082 SpvImageFormat format = w[8];
2083
2084 if (count > 9)
2085 val->type->access_qualifier = w[9];
2086 else if (b->shader->info.stage == MESA_SHADER_KERNEL)
2087 /* Per the CL C spec: If no qualifier is provided, read_only is assumed. */
2088 val->type->access_qualifier = SpvAccessQualifierReadOnly;
2089 else
2090 val->type->access_qualifier = SpvAccessQualifierReadWrite;
2091
2092 if (multisampled) {
2093 if (dim == GLSL_SAMPLER_DIM_2D)
2094 dim = GLSL_SAMPLER_DIM_MS;
2095 else if (dim == GLSL_SAMPLER_DIM_SUBPASS)
2096 dim = GLSL_SAMPLER_DIM_SUBPASS_MS;
2097 else
2098 vtn_fail("Unsupported multisampled image type");
2099 }
2100
2101 val->type->image_format = translate_image_format(b, format);
2102
2103 enum glsl_base_type sampled_base_type =
2104 glsl_get_base_type(sampled_type->type);
2105 if (sampled == 1) {
2106 val->type->glsl_image = glsl_texture_type(dim, is_array,
2107 sampled_base_type);
2108 } else if (sampled == 2) {
2109 val->type->glsl_image = glsl_image_type(dim, is_array,
2110 sampled_base_type);
2111 } else if (b->shader->info.stage == MESA_SHADER_KERNEL) {
2112 val->type->glsl_image = glsl_image_type(dim, is_array,
2113 GLSL_TYPE_VOID);
2114 } else {
2115 vtn_fail("We need to know if the image will be sampled");
2116 }
2117 break;
2118 }
2119
2120 case SpvOpTypeSampledImage: {
2121 val->type->base_type = vtn_base_type_sampled_image;
2122 val->type->image = vtn_get_type(b, w[2]);
2123
2124 validate_image_type_for_sampled_image(
2125 b, val->type->image->glsl_image,
2126 "Image Type operand of OpTypeSampledImage");
2127
2128 /* Sampled images are represented NIR as a vec2 SSA value where each
2129 * component is the result of a deref instruction. The first component
2130 * is the image and the second is the sampler. An OpLoad on an
2131 * OpTypeSampledImage pointer from UniformConstant memory just takes
2132 * the NIR deref from the pointer and duplicates it to both vector
2133 * components.
2134 */
2135 nir_address_format addr_format =
2136 vtn_mode_to_address_format(b, vtn_variable_mode_function);
2137 assert(nir_address_format_num_components(addr_format) == 1);
2138 unsigned bit_size = nir_address_format_bit_size(addr_format);
2139 assert(bit_size == 32 || bit_size == 64);
2140
2141 enum glsl_base_type base_type =
2142 bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64;
2143 val->type->type = glsl_vector_type(base_type, 2);
2144 break;
2145 }
2146
2147 case SpvOpTypeSampler:
2148 val->type->base_type = vtn_base_type_sampler;
2149
2150 /* Samplers are represented in NIR as a scalar SSA value that is the
2151 * result of a deref instruction. An OpLoad on an OpTypeSampler pointer
2152 * from UniformConstant memory just takes the NIR deref from the pointer
2153 * and turns it into an SSA value.
2154 */
2155 val->type->type = nir_address_format_to_glsl_type(
2156 vtn_mode_to_address_format(b, vtn_variable_mode_function));
2157 break;
2158
2159 case SpvOpTypeAccelerationStructureKHR:
2160 val->type->base_type = vtn_base_type_accel_struct;
2161 val->type->type = glsl_uint64_t_type();
2162 break;
2163
2164
2165 case SpvOpTypeOpaque: {
2166 val->type->base_type = vtn_base_type_struct;
2167 const char *name = vtn_string_literal(b, &w[2], count - 2, NULL);
2168 val->type->type = glsl_struct_type(NULL, 0, name, false);
2169 break;
2170 }
2171
2172 case SpvOpTypeRayQueryKHR: {
2173 val->type->base_type = vtn_base_type_ray_query;
2174 val->type->type = glsl_uint64_t_type();
2175 /* We may need to run queries on helper invocations. Here the parser
2176 * doesn't go through a deeper analysis on whether the result of a query
2177 * will be used in derivative instructions.
2178 *
2179 * An implementation willing to optimize this would look through the IR
2180 * and check if any derivative instruction uses the result of a query
2181 * and drop this flag if not.
2182 */
2183 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
2184 val->type->access = ACCESS_INCLUDE_HELPERS;
2185 break;
2186 }
2187
2188 case SpvOpTypeCooperativeMatrixKHR:
2189 vtn_handle_cooperative_type(b, val, opcode, w, count);
2190 break;
2191
2192 case SpvOpTypeEvent:
2193 val->type->base_type = vtn_base_type_event;
2194 /*
2195 * this makes the event type compatible with pointer size due to LLVM 16.
2196 * llvm 17 fixes this properly, but with 16 and opaque ptrs it's still wrong.
2197 */
2198 val->type->type = b->shader->info.cs.ptr_size == 64 ? glsl_int64_t_type() : glsl_int_type();
2199 break;
2200
2201 case SpvOpTypeDeviceEvent:
2202 case SpvOpTypeReserveId:
2203 case SpvOpTypeQueue:
2204 case SpvOpTypePipe:
2205 default:
2206 vtn_fail_with_opcode("Unhandled opcode", opcode);
2207 }
2208
2209 vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
2210
2211 if (val->type->base_type == vtn_base_type_struct &&
2212 (val->type->block || val->type->buffer_block)) {
2213 for (unsigned i = 0; i < val->type->length; i++) {
2214 vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]),
2215 "Block and BufferBlock decorations cannot decorate a "
2216 "structure type that is nested at any level inside "
2217 "another structure type decorated with Block or "
2218 "BufferBlock.");
2219 }
2220 }
2221 }
2222
2223 static nir_constant *
vtn_null_constant(struct vtn_builder * b,struct vtn_type * type)2224 vtn_null_constant(struct vtn_builder *b, struct vtn_type *type)
2225 {
2226 nir_constant *c = rzalloc(b, nir_constant);
2227
2228 switch (type->base_type) {
2229 case vtn_base_type_scalar:
2230 case vtn_base_type_vector:
2231 c->is_null_constant = true;
2232 /* Nothing to do here. It's already initialized to zero */
2233 break;
2234
2235 case vtn_base_type_pointer: {
2236 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
2237 b, type->storage_class, type->pointed, NULL);
2238 nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
2239
2240 const nir_const_value *null_value = nir_address_format_null_value(addr_format);
2241 memcpy(c->values, null_value,
2242 sizeof(nir_const_value) * nir_address_format_num_components(addr_format));
2243 break;
2244 }
2245
2246 case vtn_base_type_void:
2247 case vtn_base_type_image:
2248 case vtn_base_type_sampler:
2249 case vtn_base_type_sampled_image:
2250 case vtn_base_type_function:
2251 case vtn_base_type_event:
2252 /* For those we have to return something but it doesn't matter what. */
2253 break;
2254
2255 case vtn_base_type_matrix:
2256 case vtn_base_type_array:
2257 vtn_assert(type->length > 0);
2258 c->is_null_constant = true;
2259 c->num_elements = type->length;
2260 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2261
2262 c->elements[0] = vtn_null_constant(b, type->array_element);
2263 for (unsigned i = 1; i < c->num_elements; i++)
2264 c->elements[i] = c->elements[0];
2265 break;
2266
2267 case vtn_base_type_struct:
2268 c->is_null_constant = true;
2269 c->num_elements = type->length;
2270 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2271 for (unsigned i = 0; i < c->num_elements; i++)
2272 c->elements[i] = vtn_null_constant(b, type->members[i]);
2273 break;
2274
2275 default:
2276 vtn_fail("Invalid type for null constant");
2277 }
2278
2279 return c;
2280 }
2281
2282 static void
spec_constant_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,void * data)2283 spec_constant_decoration_cb(struct vtn_builder *b, UNUSED struct vtn_value *val,
2284 ASSERTED int member,
2285 const struct vtn_decoration *dec, void *data)
2286 {
2287 vtn_assert(member == -1);
2288 if (dec->decoration != SpvDecorationSpecId)
2289 return;
2290
2291 nir_const_value *value = data;
2292 for (unsigned i = 0; i < b->num_specializations; i++) {
2293 if (b->specializations[i].id == dec->operands[0]) {
2294 *value = b->specializations[i].value;
2295 return;
2296 }
2297 }
2298 }
2299
2300 static void
handle_workgroup_size_decoration_cb(struct vtn_builder * b,struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,UNUSED void * data)2301 handle_workgroup_size_decoration_cb(struct vtn_builder *b,
2302 struct vtn_value *val,
2303 ASSERTED int member,
2304 const struct vtn_decoration *dec,
2305 UNUSED void *data)
2306 {
2307 vtn_assert(member == -1);
2308 if (dec->decoration != SpvDecorationBuiltIn ||
2309 dec->operands[0] != SpvBuiltInWorkgroupSize)
2310 return;
2311
2312 vtn_assert(val->type->type == glsl_vector_type(GLSL_TYPE_UINT, 3));
2313 b->workgroup_size_builtin = val;
2314 }
2315
2316 static void
vtn_handle_constant(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)2317 vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
2318 const uint32_t *w, unsigned count)
2319 {
2320 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
2321 val->constant = rzalloc(b, nir_constant);
2322 switch (opcode) {
2323 case SpvOpConstantTrue:
2324 case SpvOpConstantFalse:
2325 case SpvOpSpecConstantTrue:
2326 case SpvOpSpecConstantFalse: {
2327 vtn_fail_if(val->type->type != glsl_bool_type(),
2328 "Result type of %s must be OpTypeBool",
2329 spirv_op_to_string(opcode));
2330
2331 bool bval = (opcode == SpvOpConstantTrue ||
2332 opcode == SpvOpSpecConstantTrue);
2333
2334 nir_const_value u32val = nir_const_value_for_uint(bval, 32);
2335
2336 if (opcode == SpvOpSpecConstantTrue ||
2337 opcode == SpvOpSpecConstantFalse)
2338 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32val);
2339
2340 val->constant->values[0].b = u32val.u32 != 0;
2341 break;
2342 }
2343
2344 case SpvOpConstant:
2345 case SpvOpSpecConstant: {
2346 vtn_fail_if(val->type->base_type != vtn_base_type_scalar,
2347 "Result type of %s must be a scalar",
2348 spirv_op_to_string(opcode));
2349 int bit_size = glsl_get_bit_size(val->type->type);
2350 switch (bit_size) {
2351 case 64:
2352 val->constant->values[0].u64 = vtn_u64_literal(&w[3]);
2353 break;
2354 case 32:
2355 val->constant->values[0].u32 = w[3];
2356 break;
2357 case 16:
2358 val->constant->values[0].u16 = w[3];
2359 break;
2360 case 8:
2361 val->constant->values[0].u8 = w[3];
2362 break;
2363 default:
2364 vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size);
2365 }
2366
2367 if (opcode == SpvOpSpecConstant)
2368 vtn_foreach_decoration(b, val, spec_constant_decoration_cb,
2369 &val->constant->values[0]);
2370 break;
2371 }
2372
2373 case SpvOpSpecConstantComposite:
2374 case SpvOpConstantComposite:
2375 case SpvOpConstantCompositeReplicateEXT:
2376 case SpvOpSpecConstantCompositeReplicateEXT: {
2377 const unsigned elem_count =
2378 val->type->base_type == vtn_base_type_cooperative_matrix ?
2379 1 : val->type->length;
2380
2381 nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
2382 if (opcode == SpvOpConstantCompositeReplicateEXT ||
2383 opcode == SpvOpSpecConstantCompositeReplicateEXT) {
2384 struct vtn_value *elem_val = vtn_untyped_value(b, w[3]);
2385
2386 if (elem_val->value_type == vtn_value_type_constant) {
2387 elems[0] = elem_val->constant;
2388 val->is_undef_constant = false;
2389 } else {
2390 vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
2391 "only constants or undefs allowed for %s",
2392 spirv_op_to_string(opcode));
2393 /* to make it easier, just insert a NULL constant for now */
2394 elems[0] = vtn_null_constant(b, elem_val->type);
2395 val->is_undef_constant = true;
2396 }
2397
2398 for (unsigned i = 1; i < elem_count; i++)
2399 elems[i] = elems[0];
2400 } else {
2401 vtn_fail_if(elem_count != count - 3,
2402 "%s has %u constituents, expected %u",
2403 spirv_op_to_string(opcode), count - 3, elem_count);
2404
2405 val->is_undef_constant = true;
2406 for (unsigned i = 0; i < elem_count; i++) {
2407 struct vtn_value *elem_val = vtn_untyped_value(b, w[i + 3]);
2408
2409 if (elem_val->value_type == vtn_value_type_constant) {
2410 elems[i] = elem_val->constant;
2411 val->is_undef_constant = val->is_undef_constant &&
2412 elem_val->is_undef_constant;
2413 } else {
2414 vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
2415 "only constants or undefs allowed for %s",
2416 spirv_op_to_string(opcode));
2417 /* to make it easier, just insert a NULL constant for now */
2418 elems[i] = vtn_null_constant(b, elem_val->type);
2419 }
2420 }
2421 }
2422
2423 switch (val->type->base_type) {
2424 case vtn_base_type_vector: {
2425 assert(glsl_type_is_vector(val->type->type));
2426 for (unsigned i = 0; i < elem_count; i++)
2427 val->constant->values[i] = elems[i]->values[0];
2428 break;
2429 }
2430
2431 case vtn_base_type_matrix:
2432 case vtn_base_type_struct:
2433 case vtn_base_type_array:
2434 ralloc_steal(val->constant, elems);
2435 val->constant->num_elements = elem_count;
2436 val->constant->elements = elems;
2437 break;
2438
2439 case vtn_base_type_cooperative_matrix:
2440 val->constant->values[0] = elems[0]->values[0];
2441 break;
2442
2443 default:
2444 vtn_fail("Result type of %s must be a composite type",
2445 spirv_op_to_string(opcode));
2446 }
2447 break;
2448 }
2449
2450 case SpvOpSpecConstantOp: {
2451 nir_const_value u32op = nir_const_value_for_uint(w[3], 32);
2452 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32op);
2453 SpvOp opcode = u32op.u32;
2454 switch (opcode) {
2455 case SpvOpVectorShuffle: {
2456 struct vtn_value *v0 = &b->values[w[4]];
2457 struct vtn_value *v1 = &b->values[w[5]];
2458
2459 vtn_assert(v0->value_type == vtn_value_type_constant ||
2460 v0->value_type == vtn_value_type_undef);
2461 vtn_assert(v1->value_type == vtn_value_type_constant ||
2462 v1->value_type == vtn_value_type_undef);
2463
2464 unsigned len0 = glsl_get_vector_elements(v0->type->type);
2465 unsigned len1 = glsl_get_vector_elements(v1->type->type);
2466
2467 vtn_assert(len0 + len1 < 16);
2468
2469 unsigned bit_size = glsl_get_bit_size(val->type->type);
2470 unsigned bit_size0 = glsl_get_bit_size(v0->type->type);
2471 unsigned bit_size1 = glsl_get_bit_size(v1->type->type);
2472
2473 vtn_assert(bit_size == bit_size0 && bit_size == bit_size1);
2474 (void)bit_size0; (void)bit_size1;
2475
2476 nir_const_value undef = { .u64 = 0xdeadbeefdeadbeef };
2477 nir_const_value combined[NIR_MAX_VEC_COMPONENTS * 2];
2478
2479 if (v0->value_type == vtn_value_type_constant) {
2480 for (unsigned i = 0; i < len0; i++)
2481 combined[i] = v0->constant->values[i];
2482 }
2483 if (v1->value_type == vtn_value_type_constant) {
2484 for (unsigned i = 0; i < len1; i++)
2485 combined[len0 + i] = v1->constant->values[i];
2486 }
2487
2488 for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
2489 uint32_t comp = w[i + 6];
2490 if (comp == (uint32_t)-1) {
2491 /* If component is not used, set the value to a known constant
2492 * to detect if it is wrongly used.
2493 */
2494 val->constant->values[j] = undef;
2495 } else {
2496 vtn_fail_if(comp >= len0 + len1,
2497 "All Component literals must either be FFFFFFFF "
2498 "or in [0, N - 1] (inclusive).");
2499 val->constant->values[j] = combined[comp];
2500 }
2501 }
2502 break;
2503 }
2504
2505 case SpvOpCompositeExtract:
2506 case SpvOpCompositeInsert: {
2507 struct vtn_value *comp;
2508 unsigned deref_start;
2509 struct nir_constant **c;
2510 if (opcode == SpvOpCompositeExtract) {
2511 comp = vtn_value(b, w[4], vtn_value_type_constant);
2512 deref_start = 5;
2513 c = &comp->constant;
2514 } else {
2515 comp = vtn_value(b, w[5], vtn_value_type_constant);
2516 deref_start = 6;
2517 val->constant = nir_constant_clone(comp->constant,
2518 (nir_variable *)b);
2519 c = &val->constant;
2520 }
2521
2522 int elem = -1;
2523 const struct vtn_type *type = comp->type;
2524 for (unsigned i = deref_start; i < count; i++) {
2525 if (type->base_type == vtn_base_type_cooperative_matrix) {
2526 /* Cooperative matrices are always scalar constants. We don't
2527 * care about the index w[i] because it's always replicated.
2528 */
2529 type = type->component_type;
2530 } else {
2531 vtn_fail_if(w[i] > type->length,
2532 "%uth index of %s is %u but the type has only "
2533 "%u elements", i - deref_start,
2534 spirv_op_to_string(opcode), w[i], type->length);
2535
2536 switch (type->base_type) {
2537 case vtn_base_type_vector:
2538 elem = w[i];
2539 type = type->array_element;
2540 break;
2541
2542 case vtn_base_type_matrix:
2543 case vtn_base_type_array:
2544 c = &(*c)->elements[w[i]];
2545 type = type->array_element;
2546 break;
2547
2548 case vtn_base_type_struct:
2549 c = &(*c)->elements[w[i]];
2550 type = type->members[w[i]];
2551 break;
2552
2553 default:
2554 vtn_fail("%s must only index into composite types",
2555 spirv_op_to_string(opcode));
2556 }
2557 }
2558 }
2559
2560 if (opcode == SpvOpCompositeExtract) {
2561 if (elem == -1) {
2562 val->constant = *c;
2563 } else {
2564 unsigned num_components = type->length;
2565 for (unsigned i = 0; i < num_components; i++)
2566 val->constant->values[i] = (*c)->values[elem + i];
2567 }
2568 } else {
2569 struct vtn_value *insert =
2570 vtn_value(b, w[4], vtn_value_type_constant);
2571 vtn_assert(insert->type == type);
2572 if (elem == -1) {
2573 *c = insert->constant;
2574 } else {
2575 unsigned num_components = type->length;
2576 for (unsigned i = 0; i < num_components; i++)
2577 (*c)->values[elem + i] = insert->constant->values[i];
2578 }
2579 }
2580 break;
2581 }
2582
2583 default: {
2584 bool swap;
2585 nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(val->type->type);
2586 nir_alu_type src_alu_type = dst_alu_type;
2587 unsigned num_components = glsl_get_vector_elements(val->type->type);
2588 unsigned bit_size;
2589
2590 vtn_assert(count <= 7);
2591
2592 switch (opcode) {
2593 case SpvOpSConvert:
2594 case SpvOpFConvert:
2595 case SpvOpUConvert:
2596 /* We have a source in a conversion */
2597 src_alu_type =
2598 nir_get_nir_type_for_glsl_type(vtn_get_value_type(b, w[4])->type);
2599 /* We use the bitsize of the conversion source to evaluate the opcode later */
2600 bit_size = glsl_get_bit_size(vtn_get_value_type(b, w[4])->type);
2601 break;
2602 default:
2603 bit_size = glsl_get_bit_size(val->type->type);
2604 };
2605
2606 bool exact;
2607 nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
2608 nir_alu_type_get_type_size(src_alu_type),
2609 nir_alu_type_get_type_size(dst_alu_type));
2610
2611 /* No SPIR-V opcodes handled through this path should set exact.
2612 * Since it is ignored, assert on it.
2613 */
2614 assert(!exact);
2615
2616 nir_const_value src[3][NIR_MAX_VEC_COMPONENTS];
2617
2618 for (unsigned i = 0; i < count - 4; i++) {
2619 struct vtn_value *src_val =
2620 vtn_value(b, w[4 + i], vtn_value_type_constant);
2621
2622 /* If this is an unsized source, pull the bit size from the
2623 * source; otherwise, we'll use the bit size from the destination.
2624 */
2625 if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]))
2626 bit_size = glsl_get_bit_size(src_val->type->type);
2627
2628 unsigned src_comps = nir_op_infos[op].input_sizes[i] ?
2629 nir_op_infos[op].input_sizes[i] :
2630 num_components;
2631
2632 unsigned j = swap ? 1 - i : i;
2633 for (unsigned c = 0; c < src_comps; c++)
2634 src[j][c] = src_val->constant->values[c];
2635 }
2636
2637 /* fix up fixed size sources */
2638 switch (op) {
2639 case nir_op_ishl:
2640 case nir_op_ishr:
2641 case nir_op_ushr: {
2642 if (bit_size == 32)
2643 break;
2644 for (unsigned i = 0; i < num_components; ++i) {
2645 switch (bit_size) {
2646 case 64: src[1][i].u32 = src[1][i].u64; break;
2647 case 16: src[1][i].u32 = src[1][i].u16; break;
2648 case 8: src[1][i].u32 = src[1][i].u8; break;
2649 }
2650 }
2651 break;
2652 }
2653 default:
2654 break;
2655 }
2656
2657 nir_const_value *srcs[3] = {
2658 src[0], src[1], src[2],
2659 };
2660 nir_eval_const_opcode(op, val->constant->values,
2661 num_components, bit_size, srcs,
2662 b->shader->info.float_controls_execution_mode);
2663 break;
2664 } /* default */
2665 }
2666 break;
2667 }
2668
2669 case SpvOpConstantNull:
2670 val->constant = vtn_null_constant(b, val->type);
2671 val->is_null_constant = true;
2672 break;
2673
2674 default:
2675 vtn_fail_with_opcode("Unhandled opcode", opcode);
2676 }
2677
2678 /* Now that we have the value, update the workgroup size if needed */
2679 if (gl_shader_stage_uses_workgroup(b->entry_point_stage))
2680 vtn_foreach_decoration(b, val, handle_workgroup_size_decoration_cb,
2681 NULL);
2682 }
2683
2684 static void
vtn_split_barrier_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics,SpvMemorySemanticsMask * before,SpvMemorySemanticsMask * after)2685 vtn_split_barrier_semantics(struct vtn_builder *b,
2686 SpvMemorySemanticsMask semantics,
2687 SpvMemorySemanticsMask *before,
2688 SpvMemorySemanticsMask *after)
2689 {
2690 /* For memory semantics embedded in operations, we split them into up to
2691 * two barriers, to be added before and after the operation. This is less
2692 * strict than if we propagated until the final backend stage, but still
2693 * result in correct execution.
2694 *
2695 * A further improvement could be pipe this information (and use!) into the
2696 * next compiler layers, at the expense of making the handling of barriers
2697 * more complicated.
2698 */
2699
2700 *before = SpvMemorySemanticsMaskNone;
2701 *after = SpvMemorySemanticsMaskNone;
2702
2703 SpvMemorySemanticsMask order_semantics =
2704 semantics & (SpvMemorySemanticsAcquireMask |
2705 SpvMemorySemanticsReleaseMask |
2706 SpvMemorySemanticsAcquireReleaseMask |
2707 SpvMemorySemanticsSequentiallyConsistentMask);
2708
2709 if (util_bitcount(order_semantics) > 1) {
2710 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2711 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2712 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2713 */
2714 vtn_warn("Multiple memory ordering semantics specified, "
2715 "assuming AcquireRelease.");
2716 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2717 }
2718
2719 const SpvMemorySemanticsMask av_vis_semantics =
2720 semantics & (SpvMemorySemanticsMakeAvailableMask |
2721 SpvMemorySemanticsMakeVisibleMask);
2722
2723 const SpvMemorySemanticsMask storage_semantics =
2724 semantics & (SpvMemorySemanticsUniformMemoryMask |
2725 SpvMemorySemanticsSubgroupMemoryMask |
2726 SpvMemorySemanticsWorkgroupMemoryMask |
2727 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2728 SpvMemorySemanticsAtomicCounterMemoryMask |
2729 SpvMemorySemanticsImageMemoryMask |
2730 SpvMemorySemanticsOutputMemoryMask);
2731
2732 const SpvMemorySemanticsMask other_semantics =
2733 semantics & ~(order_semantics | av_vis_semantics | storage_semantics |
2734 SpvMemorySemanticsVolatileMask);
2735
2736 if (other_semantics)
2737 vtn_warn("Ignoring unhandled memory semantics: %u\n", other_semantics);
2738
2739 /* SequentiallyConsistent is treated as AcquireRelease. */
2740
2741 /* The RELEASE barrier happens BEFORE the operation, and it is usually
2742 * associated with a Store. All the write operations with a matching
2743 * semantics will not be reordered after the Store.
2744 */
2745 if (order_semantics & (SpvMemorySemanticsReleaseMask |
2746 SpvMemorySemanticsAcquireReleaseMask |
2747 SpvMemorySemanticsSequentiallyConsistentMask)) {
2748 *before |= SpvMemorySemanticsReleaseMask | storage_semantics;
2749 }
2750
2751 /* The ACQUIRE barrier happens AFTER the operation, and it is usually
2752 * associated with a Load. All the operations with a matching semantics
2753 * will not be reordered before the Load.
2754 */
2755 if (order_semantics & (SpvMemorySemanticsAcquireMask |
2756 SpvMemorySemanticsAcquireReleaseMask |
2757 SpvMemorySemanticsSequentiallyConsistentMask)) {
2758 *after |= SpvMemorySemanticsAcquireMask | storage_semantics;
2759 }
2760
2761 if (av_vis_semantics & SpvMemorySemanticsMakeVisibleMask)
2762 *before |= SpvMemorySemanticsMakeVisibleMask | storage_semantics;
2763
2764 if (av_vis_semantics & SpvMemorySemanticsMakeAvailableMask)
2765 *after |= SpvMemorySemanticsMakeAvailableMask | storage_semantics;
2766 }
2767
2768 static nir_memory_semantics
vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2769 vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder *b,
2770 SpvMemorySemanticsMask semantics)
2771 {
2772 nir_memory_semantics nir_semantics = 0;
2773
2774 SpvMemorySemanticsMask order_semantics =
2775 semantics & (SpvMemorySemanticsAcquireMask |
2776 SpvMemorySemanticsReleaseMask |
2777 SpvMemorySemanticsAcquireReleaseMask |
2778 SpvMemorySemanticsSequentiallyConsistentMask);
2779
2780 if (util_bitcount(order_semantics) > 1) {
2781 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2782 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2783 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2784 */
2785 vtn_warn("Multiple memory ordering semantics bits specified, "
2786 "assuming AcquireRelease.");
2787 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2788 }
2789
2790 switch (order_semantics) {
2791 case 0:
2792 /* Not an ordering barrier. */
2793 break;
2794
2795 case SpvMemorySemanticsAcquireMask:
2796 nir_semantics = NIR_MEMORY_ACQUIRE;
2797 break;
2798
2799 case SpvMemorySemanticsReleaseMask:
2800 nir_semantics = NIR_MEMORY_RELEASE;
2801 break;
2802
2803 case SpvMemorySemanticsSequentiallyConsistentMask:
2804 FALLTHROUGH; /* Treated as AcquireRelease in Vulkan. */
2805 case SpvMemorySemanticsAcquireReleaseMask:
2806 nir_semantics = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE;
2807 break;
2808
2809 default:
2810 unreachable("Invalid memory order semantics");
2811 }
2812
2813 if (semantics & SpvMemorySemanticsMakeAvailableMask) {
2814 vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
2815 "To use MakeAvailable memory semantics the VulkanMemoryModel "
2816 "capability must be declared.");
2817 nir_semantics |= NIR_MEMORY_MAKE_AVAILABLE;
2818 }
2819
2820 if (semantics & SpvMemorySemanticsMakeVisibleMask) {
2821 vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
2822 "To use MakeVisible memory semantics the VulkanMemoryModel "
2823 "capability must be declared.");
2824 nir_semantics |= NIR_MEMORY_MAKE_VISIBLE;
2825 }
2826
2827 return nir_semantics;
2828 }
2829
2830 static nir_variable_mode
vtn_mem_semantics_to_nir_var_modes(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2831 vtn_mem_semantics_to_nir_var_modes(struct vtn_builder *b,
2832 SpvMemorySemanticsMask semantics)
2833 {
2834 /* Vulkan Environment for SPIR-V says "SubgroupMemory, CrossWorkgroupMemory,
2835 * and AtomicCounterMemory are ignored".
2836 */
2837 if (b->options->environment == NIR_SPIRV_VULKAN) {
2838 semantics &= ~(SpvMemorySemanticsSubgroupMemoryMask |
2839 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2840 SpvMemorySemanticsAtomicCounterMemoryMask);
2841 }
2842
2843 nir_variable_mode modes = 0;
2844 if (semantics & SpvMemorySemanticsUniformMemoryMask)
2845 modes |= nir_var_mem_ssbo | nir_var_mem_global;
2846 if (semantics & SpvMemorySemanticsImageMemoryMask)
2847 modes |= nir_var_image;
2848 if (semantics & SpvMemorySemanticsWorkgroupMemoryMask)
2849 modes |= nir_var_mem_shared;
2850 if (semantics & SpvMemorySemanticsCrossWorkgroupMemoryMask)
2851 modes |= nir_var_mem_global;
2852 if (semantics & SpvMemorySemanticsOutputMemoryMask) {
2853 modes |= nir_var_shader_out;
2854
2855 if (b->shader->info.stage == MESA_SHADER_TASK)
2856 modes |= nir_var_mem_task_payload;
2857 }
2858
2859 if (semantics & SpvMemorySemanticsAtomicCounterMemoryMask) {
2860 /* There's no nir_var_atomic_counter, but since atomic counters are
2861 * lowered to SSBOs, we use nir_var_mem_ssbo instead.
2862 */
2863 modes |= nir_var_mem_ssbo;
2864 }
2865
2866 return modes;
2867 }
2868
2869 mesa_scope
vtn_translate_scope(struct vtn_builder * b,SpvScope scope)2870 vtn_translate_scope(struct vtn_builder *b, SpvScope scope)
2871 {
2872 switch (scope) {
2873 case SpvScopeDevice:
2874 vtn_fail_if(b->supported_capabilities.VulkanMemoryModel &&
2875 !b->supported_capabilities.VulkanMemoryModelDeviceScope,
2876 "If the Vulkan memory model is declared and any instruction "
2877 "uses Device scope, the VulkanMemoryModelDeviceScope "
2878 "capability must be declared.");
2879 return SCOPE_DEVICE;
2880
2881 case SpvScopeQueueFamily:
2882 vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
2883 "To use Queue Family scope, the VulkanMemoryModel capability "
2884 "must be declared.");
2885 return SCOPE_QUEUE_FAMILY;
2886
2887 case SpvScopeWorkgroup:
2888 return SCOPE_WORKGROUP;
2889
2890 case SpvScopeSubgroup:
2891 return SCOPE_SUBGROUP;
2892
2893 case SpvScopeInvocation:
2894 return SCOPE_INVOCATION;
2895
2896 case SpvScopeShaderCallKHR:
2897 return SCOPE_SHADER_CALL;
2898
2899 default:
2900 vtn_fail("Invalid memory scope");
2901 }
2902 }
2903
2904 static void
vtn_emit_scoped_control_barrier(struct vtn_builder * b,SpvScope exec_scope,SpvScope mem_scope,SpvMemorySemanticsMask semantics)2905 vtn_emit_scoped_control_barrier(struct vtn_builder *b, SpvScope exec_scope,
2906 SpvScope mem_scope,
2907 SpvMemorySemanticsMask semantics)
2908 {
2909 nir_memory_semantics nir_semantics =
2910 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2911 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2912 mesa_scope nir_exec_scope = vtn_translate_scope(b, exec_scope);
2913
2914 /* Memory semantics is optional for OpControlBarrier. */
2915 mesa_scope nir_mem_scope;
2916 if (nir_semantics == 0 || modes == 0)
2917 nir_mem_scope = SCOPE_NONE;
2918 else
2919 nir_mem_scope = vtn_translate_scope(b, mem_scope);
2920
2921 nir_barrier(&b->nb, .execution_scope=nir_exec_scope, .memory_scope=nir_mem_scope,
2922 .memory_semantics=nir_semantics, .memory_modes=modes);
2923 }
2924
2925 void
vtn_emit_memory_barrier(struct vtn_builder * b,SpvScope scope,SpvMemorySemanticsMask semantics)2926 vtn_emit_memory_barrier(struct vtn_builder *b, SpvScope scope,
2927 SpvMemorySemanticsMask semantics)
2928 {
2929 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2930 nir_memory_semantics nir_semantics =
2931 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2932
2933 /* No barrier to add. */
2934 if (nir_semantics == 0 || modes == 0)
2935 return;
2936
2937 nir_barrier(&b->nb, .memory_scope=vtn_translate_scope(b, scope),
2938 .memory_semantics=nir_semantics,
2939 .memory_modes=modes);
2940 }
2941
2942 struct vtn_ssa_value *
vtn_create_ssa_value(struct vtn_builder * b,const struct glsl_type * type)2943 vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
2944 {
2945 /* Always use bare types for SSA values for a couple of reasons:
2946 *
2947 * 1. Code which emits deref chains should never listen to the explicit
2948 * layout information on the SSA value if any exists. If we've
2949 * accidentally been relying on this, we want to find those bugs.
2950 *
2951 * 2. We want to be able to quickly check that an SSA value being assigned
2952 * to a SPIR-V value has the right type. Using bare types everywhere
2953 * ensures that we can pointer-compare.
2954 */
2955 struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
2956 val->type = glsl_get_bare_type(type);
2957
2958
2959 if (!glsl_type_is_vector_or_scalar(type)) {
2960 unsigned elems = glsl_get_length(val->type);
2961 val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
2962 if (glsl_type_is_array_or_matrix(type) || glsl_type_is_cmat(type)) {
2963 const struct glsl_type *elem_type = glsl_get_array_element(type);
2964 for (unsigned i = 0; i < elems; i++)
2965 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2966 } else {
2967 vtn_assert(glsl_type_is_struct_or_ifc(type));
2968 for (unsigned i = 0; i < elems; i++) {
2969 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
2970 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2971 }
2972 }
2973 }
2974
2975 return val;
2976 }
2977
2978 void
vtn_set_ssa_value_var(struct vtn_builder * b,struct vtn_ssa_value * ssa,nir_variable * var)2979 vtn_set_ssa_value_var(struct vtn_builder *b, struct vtn_ssa_value *ssa, nir_variable *var)
2980 {
2981 vtn_assert(glsl_type_is_cmat(var->type));
2982 vtn_assert(var->type == ssa->type);
2983 ssa->is_variable = true;
2984 ssa->var = var;
2985 }
2986
2987 static nir_tex_src
vtn_tex_src(struct vtn_builder * b,unsigned index,nir_tex_src_type type)2988 vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
2989 {
2990 return nir_tex_src_for_ssa(type, vtn_get_nir_ssa(b, index));
2991 }
2992
2993 static uint32_t
image_operand_arg(struct vtn_builder * b,const uint32_t * w,uint32_t count,uint32_t mask_idx,SpvImageOperandsMask op)2994 image_operand_arg(struct vtn_builder *b, const uint32_t *w, uint32_t count,
2995 uint32_t mask_idx, SpvImageOperandsMask op)
2996 {
2997 static const SpvImageOperandsMask ops_with_arg =
2998 SpvImageOperandsBiasMask |
2999 SpvImageOperandsLodMask |
3000 SpvImageOperandsGradMask |
3001 SpvImageOperandsConstOffsetMask |
3002 SpvImageOperandsOffsetMask |
3003 SpvImageOperandsConstOffsetsMask |
3004 SpvImageOperandsSampleMask |
3005 SpvImageOperandsMinLodMask |
3006 SpvImageOperandsMakeTexelAvailableMask |
3007 SpvImageOperandsMakeTexelVisibleMask;
3008
3009 assert(util_bitcount(op) == 1);
3010 assert(w[mask_idx] & op);
3011 assert(op & ops_with_arg);
3012
3013 uint32_t idx = util_bitcount(w[mask_idx] & (op - 1) & ops_with_arg) + 1;
3014
3015 /* Adjust indices for operands with two arguments. */
3016 static const SpvImageOperandsMask ops_with_two_args =
3017 SpvImageOperandsGradMask;
3018 idx += util_bitcount(w[mask_idx] & (op - 1) & ops_with_two_args);
3019
3020 idx += mask_idx;
3021
3022 vtn_fail_if(idx + (op & ops_with_two_args ? 1 : 0) >= count,
3023 "Image op claims to have %s but does not enough "
3024 "following operands", spirv_imageoperands_to_string(op));
3025
3026 return idx;
3027 }
3028
3029 static void
non_uniform_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)3030 non_uniform_decoration_cb(struct vtn_builder *b,
3031 struct vtn_value *val, int member,
3032 const struct vtn_decoration *dec, void *void_ctx)
3033 {
3034 enum gl_access_qualifier *access = void_ctx;
3035 switch (dec->decoration) {
3036 case SpvDecorationNonUniformEXT:
3037 *access |= ACCESS_NON_UNIFORM;
3038 break;
3039
3040 default:
3041 break;
3042 }
3043 }
3044
3045 /* Apply SignExtend/ZeroExtend operands to get the actual result type for
3046 * image read/sample operations and source type for write operations.
3047 */
3048 static nir_alu_type
get_image_type(struct vtn_builder * b,nir_alu_type type,unsigned operands)3049 get_image_type(struct vtn_builder *b, nir_alu_type type, unsigned operands)
3050 {
3051 unsigned extend_operands =
3052 operands & (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask);
3053 vtn_fail_if(nir_alu_type_get_base_type(type) == nir_type_float && extend_operands,
3054 "SignExtend/ZeroExtend used on floating-point texel type");
3055 vtn_fail_if(extend_operands ==
3056 (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask),
3057 "SignExtend and ZeroExtend both specified");
3058
3059 if (operands & SpvImageOperandsSignExtendMask)
3060 return nir_type_int | nir_alu_type_get_type_size(type);
3061 if (operands & SpvImageOperandsZeroExtendMask)
3062 return nir_type_uint | nir_alu_type_get_type_size(type);
3063
3064 return type;
3065 }
3066
3067 static void
vtn_handle_texture(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3068 vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
3069 const uint32_t *w, unsigned count)
3070 {
3071 if (opcode == SpvOpSampledImage) {
3072 struct vtn_sampled_image si = {
3073 .image = vtn_get_image(b, w[3], NULL),
3074 .sampler = vtn_get_sampler(b, w[4]),
3075 };
3076
3077 validate_image_type_for_sampled_image(
3078 b, si.image->type,
3079 "Type of Image operand of OpSampledImage");
3080
3081 enum gl_access_qualifier access = 0;
3082 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
3083 non_uniform_decoration_cb, &access);
3084 vtn_foreach_decoration(b, vtn_untyped_value(b, w[4]),
3085 non_uniform_decoration_cb, &access);
3086
3087 vtn_push_sampled_image(b, w[2], si, access & ACCESS_NON_UNIFORM);
3088 return;
3089 } else if (opcode == SpvOpImage) {
3090 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
3091
3092 enum gl_access_qualifier access = 0;
3093 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
3094 non_uniform_decoration_cb, &access);
3095
3096 vtn_push_image(b, w[2], si.image, access & ACCESS_NON_UNIFORM);
3097 return;
3098 } else if (opcode == SpvOpImageSparseTexelsResident) {
3099 nir_def *code = vtn_get_nir_ssa(b, w[3]);
3100 vtn_push_nir_ssa(b, w[2], nir_is_sparse_texels_resident(&b->nb, 1, code));
3101 return;
3102 }
3103
3104 nir_deref_instr *image = NULL, *sampler = NULL;
3105 struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
3106 if (sampled_val->type->base_type == vtn_base_type_sampled_image) {
3107 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
3108 image = si.image;
3109 sampler = si.sampler;
3110 } else {
3111 image = vtn_get_image(b, w[3], NULL);
3112 }
3113
3114 const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image->type);
3115 const bool is_array = glsl_sampler_type_is_array(image->type);
3116 nir_alu_type dest_type = nir_type_invalid;
3117
3118 /* Figure out the base texture operation */
3119 nir_texop texop;
3120 switch (opcode) {
3121 case SpvOpImageSampleImplicitLod:
3122 case SpvOpImageSparseSampleImplicitLod:
3123 case SpvOpImageSampleDrefImplicitLod:
3124 case SpvOpImageSparseSampleDrefImplicitLod:
3125 vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
3126 sampler_dim != GLSL_SAMPLER_DIM_MS &&
3127 sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
3128 texop = nir_texop_tex;
3129 break;
3130
3131 case SpvOpImageSampleProjImplicitLod:
3132 case SpvOpImageSampleProjDrefImplicitLod:
3133 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3134 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3135 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3136 sampler_dim == GLSL_SAMPLER_DIM_RECT);
3137 vtn_assert(!is_array);
3138 texop = nir_texop_tex;
3139 break;
3140
3141 case SpvOpImageSampleExplicitLod:
3142 case SpvOpImageSparseSampleExplicitLod:
3143 case SpvOpImageSampleDrefExplicitLod:
3144 case SpvOpImageSparseSampleDrefExplicitLod:
3145 vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
3146 sampler_dim != GLSL_SAMPLER_DIM_MS &&
3147 sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
3148 texop = nir_texop_txl;
3149 break;
3150
3151 case SpvOpImageSampleProjExplicitLod:
3152 case SpvOpImageSampleProjDrefExplicitLod:
3153 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3154 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3155 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3156 sampler_dim == GLSL_SAMPLER_DIM_RECT);
3157 vtn_assert(!is_array);
3158 texop = nir_texop_txl;
3159 break;
3160
3161 case SpvOpImageFetch:
3162 case SpvOpImageSparseFetch:
3163 vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_CUBE);
3164 if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
3165 texop = nir_texop_txf_ms;
3166 } else {
3167 texop = nir_texop_txf;
3168 }
3169 break;
3170
3171 case SpvOpImageGather:
3172 case SpvOpImageSparseGather:
3173 case SpvOpImageDrefGather:
3174 case SpvOpImageSparseDrefGather:
3175 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_2D ||
3176 sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
3177 sampler_dim == GLSL_SAMPLER_DIM_RECT);
3178 texop = nir_texop_tg4;
3179 break;
3180
3181 case SpvOpImageQuerySizeLod:
3182 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3183 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3184 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3185 sampler_dim == GLSL_SAMPLER_DIM_CUBE);
3186 texop = nir_texop_txs;
3187 dest_type = nir_type_int32;
3188 break;
3189
3190 case SpvOpImageQuerySize:
3191 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3192 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3193 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3194 sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
3195 sampler_dim == GLSL_SAMPLER_DIM_RECT ||
3196 sampler_dim == GLSL_SAMPLER_DIM_MS ||
3197 sampler_dim == GLSL_SAMPLER_DIM_BUF);
3198 texop = nir_texop_txs;
3199 dest_type = nir_type_int32;
3200 break;
3201
3202 case SpvOpImageQueryLod:
3203 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3204 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3205 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3206 sampler_dim == GLSL_SAMPLER_DIM_CUBE);
3207 texop = nir_texop_lod;
3208 dest_type = nir_type_float32;
3209 break;
3210
3211 case SpvOpImageQueryLevels:
3212 /* This operation is not valid for a MS image but present in some old
3213 * shaders. Just return 1 in those cases.
3214 */
3215 if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
3216 vtn_warn("OpImageQueryLevels 'Sampled Image' should have an MS of 0, "
3217 "but found MS of 1. Replacing query with constant value 1.");
3218 vtn_push_nir_ssa(b, w[2], nir_imm_int(&b->nb, 1));
3219 return;
3220 }
3221 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3222 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3223 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3224 sampler_dim == GLSL_SAMPLER_DIM_CUBE);
3225 texop = nir_texop_query_levels;
3226 dest_type = nir_type_int32;
3227 break;
3228
3229 case SpvOpImageQuerySamples:
3230 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS);
3231 texop = nir_texop_texture_samples;
3232 dest_type = nir_type_int32;
3233 break;
3234
3235 case SpvOpFragmentFetchAMD:
3236 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
3237 sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3238 texop = nir_texop_fragment_fetch_amd;
3239 break;
3240
3241 case SpvOpFragmentMaskFetchAMD:
3242 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
3243 sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3244 texop = nir_texop_fragment_mask_fetch_amd;
3245 dest_type = nir_type_uint32;
3246 break;
3247
3248 default:
3249 vtn_fail_with_opcode("Unhandled opcode", opcode);
3250 }
3251
3252 nir_tex_src srcs[10]; /* 10 should be enough */
3253 nir_tex_src *p = srcs;
3254
3255 p->src = nir_src_for_ssa(&image->def);
3256 p->src_type = nir_tex_src_texture_deref;
3257 p++;
3258
3259 switch (texop) {
3260 case nir_texop_tex:
3261 case nir_texop_txb:
3262 case nir_texop_txl:
3263 case nir_texop_txd:
3264 case nir_texop_tg4:
3265 case nir_texop_lod:
3266 vtn_fail_if(sampler == NULL,
3267 "%s requires an image of type OpTypeSampledImage",
3268 spirv_op_to_string(opcode));
3269 p->src = nir_src_for_ssa(&sampler->def);
3270 p->src_type = nir_tex_src_sampler_deref;
3271 p++;
3272 break;
3273 case nir_texop_txf:
3274 case nir_texop_txf_ms:
3275 case nir_texop_txs:
3276 case nir_texop_query_levels:
3277 case nir_texop_texture_samples:
3278 case nir_texop_samples_identical:
3279 case nir_texop_fragment_fetch_amd:
3280 case nir_texop_fragment_mask_fetch_amd:
3281 /* These don't */
3282 break;
3283 case nir_texop_txf_ms_fb:
3284 vtn_fail("unexpected nir_texop_txf_ms_fb");
3285 break;
3286 case nir_texop_txf_ms_mcs_intel:
3287 vtn_fail("unexpected nir_texop_txf_ms_mcs");
3288 break;
3289 case nir_texop_tex_prefetch:
3290 vtn_fail("unexpected nir_texop_tex_prefetch");
3291 break;
3292 case nir_texop_descriptor_amd:
3293 case nir_texop_sampler_descriptor_amd:
3294 vtn_fail("unexpected nir_texop_*descriptor_amd");
3295 break;
3296 case nir_texop_lod_bias_agx:
3297 case nir_texop_custom_border_color_agx:
3298 case nir_texop_has_custom_border_color_agx:
3299 vtn_fail("unexpected nir_texop_*_agx");
3300 break;
3301 case nir_texop_hdr_dim_nv:
3302 case nir_texop_tex_type_nv:
3303 vtn_fail("unexpected nir_texop_*_nv");
3304 break;
3305 }
3306
3307 unsigned idx = 4;
3308
3309 struct nir_def *coord;
3310 unsigned coord_components;
3311 switch (opcode) {
3312 case SpvOpImageSampleImplicitLod:
3313 case SpvOpImageSparseSampleImplicitLod:
3314 case SpvOpImageSampleExplicitLod:
3315 case SpvOpImageSparseSampleExplicitLod:
3316 case SpvOpImageSampleDrefImplicitLod:
3317 case SpvOpImageSparseSampleDrefImplicitLod:
3318 case SpvOpImageSampleDrefExplicitLod:
3319 case SpvOpImageSparseSampleDrefExplicitLod:
3320 case SpvOpImageSampleProjImplicitLod:
3321 case SpvOpImageSampleProjExplicitLod:
3322 case SpvOpImageSampleProjDrefImplicitLod:
3323 case SpvOpImageSampleProjDrefExplicitLod:
3324 case SpvOpImageFetch:
3325 case SpvOpImageSparseFetch:
3326 case SpvOpImageGather:
3327 case SpvOpImageSparseGather:
3328 case SpvOpImageDrefGather:
3329 case SpvOpImageSparseDrefGather:
3330 case SpvOpImageQueryLod:
3331 case SpvOpFragmentFetchAMD:
3332 case SpvOpFragmentMaskFetchAMD: {
3333 /* All these types have the coordinate as their first real argument */
3334 coord_components = glsl_get_sampler_dim_coordinate_components(sampler_dim);
3335
3336 if (is_array && texop != nir_texop_lod)
3337 coord_components++;
3338
3339 struct vtn_ssa_value *coord_val = vtn_ssa_value(b, w[idx++]);
3340 coord = coord_val->def;
3341 /* From the SPIR-V spec verxion 1.5, rev. 5:
3342 *
3343 * "Coordinate must be a scalar or vector of floating-point type. It
3344 * contains (u[, v] ... [, array layer]) as needed by the definition
3345 * of Sampled Image. It may be a vector larger than needed, but all
3346 * unused components appear after all used components."
3347 */
3348 vtn_fail_if(coord->num_components < coord_components,
3349 "Coordinate value passed has fewer components than sampler dimensionality.");
3350 p->src = nir_src_for_ssa(nir_trim_vector(&b->nb, coord, coord_components));
3351
3352 /* OpenCL allows integer sampling coordinates */
3353 if (glsl_type_is_integer(coord_val->type) &&
3354 opcode == SpvOpImageSampleExplicitLod) {
3355 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
3356 "Unless the Kernel capability is being used, the coordinate parameter "
3357 "OpImageSampleExplicitLod must be floating point.");
3358
3359 nir_def *coords[4];
3360 nir_def *f0_5 = nir_imm_float(&b->nb, 0.5);
3361 for (unsigned i = 0; i < coord_components; i++) {
3362 coords[i] = nir_i2f32(&b->nb, nir_channel(&b->nb, p->src.ssa, i));
3363
3364 if (!is_array || i != coord_components - 1)
3365 coords[i] = nir_fadd(&b->nb, coords[i], f0_5);
3366 }
3367
3368 p->src = nir_src_for_ssa(nir_vec(&b->nb, coords, coord_components));
3369 }
3370
3371 p->src_type = nir_tex_src_coord;
3372 p++;
3373 break;
3374 }
3375
3376 default:
3377 coord = NULL;
3378 coord_components = 0;
3379 break;
3380 }
3381
3382 switch (opcode) {
3383 case SpvOpImageSampleProjImplicitLod:
3384 case SpvOpImageSampleProjExplicitLod:
3385 case SpvOpImageSampleProjDrefImplicitLod:
3386 case SpvOpImageSampleProjDrefExplicitLod:
3387 /* These have the projector as the last coordinate component */
3388 p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components));
3389 p->src_type = nir_tex_src_projector;
3390 p++;
3391 break;
3392
3393 default:
3394 break;
3395 }
3396
3397 bool is_shadow = false;
3398 unsigned gather_component = 0;
3399 switch (opcode) {
3400 case SpvOpImageSampleDrefImplicitLod:
3401 case SpvOpImageSparseSampleDrefImplicitLod:
3402 case SpvOpImageSampleDrefExplicitLod:
3403 case SpvOpImageSparseSampleDrefExplicitLod:
3404 case SpvOpImageSampleProjDrefImplicitLod:
3405 case SpvOpImageSampleProjDrefExplicitLod:
3406 case SpvOpImageDrefGather:
3407 case SpvOpImageSparseDrefGather:
3408 /* These all have an explicit depth value as their next source */
3409 is_shadow = true;
3410 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
3411 break;
3412
3413 case SpvOpImageGather:
3414 case SpvOpImageSparseGather:
3415 /* This has a component as its next source */
3416 gather_component = vtn_constant_uint(b, w[idx++]);
3417 break;
3418
3419 default:
3420 break;
3421 }
3422
3423 bool is_sparse = false;
3424 switch (opcode) {
3425 case SpvOpImageSparseSampleImplicitLod:
3426 case SpvOpImageSparseSampleExplicitLod:
3427 case SpvOpImageSparseSampleDrefImplicitLod:
3428 case SpvOpImageSparseSampleDrefExplicitLod:
3429 case SpvOpImageSparseFetch:
3430 case SpvOpImageSparseGather:
3431 case SpvOpImageSparseDrefGather:
3432 is_sparse = true;
3433 break;
3434 default:
3435 break;
3436 }
3437
3438 /* For OpImageQuerySizeLod, we always have an LOD */
3439 if (opcode == SpvOpImageQuerySizeLod)
3440 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
3441
3442 /* For OpFragmentFetchAMD, we always have a multisample index */
3443 if (opcode == SpvOpFragmentFetchAMD)
3444 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
3445
3446 /* Now we need to handle some number of optional arguments */
3447 struct vtn_value *gather_offsets = NULL;
3448 uint32_t operands = SpvImageOperandsMaskNone;
3449 if (idx < count) {
3450 operands = w[idx];
3451
3452 if (operands & SpvImageOperandsBiasMask) {
3453 vtn_assert(texop == nir_texop_tex ||
3454 texop == nir_texop_tg4);
3455 if (texop == nir_texop_tex)
3456 texop = nir_texop_txb;
3457 uint32_t arg = image_operand_arg(b, w, count, idx,
3458 SpvImageOperandsBiasMask);
3459 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_bias);
3460 }
3461
3462 if (operands & SpvImageOperandsLodMask) {
3463 vtn_assert(texop == nir_texop_txl || texop == nir_texop_txf ||
3464 texop == nir_texop_txs || texop == nir_texop_tg4);
3465 uint32_t arg = image_operand_arg(b, w, count, idx,
3466 SpvImageOperandsLodMask);
3467 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_lod);
3468 }
3469
3470 if (operands & SpvImageOperandsGradMask) {
3471 vtn_assert(texop == nir_texop_txl);
3472 texop = nir_texop_txd;
3473 uint32_t arg = image_operand_arg(b, w, count, idx,
3474 SpvImageOperandsGradMask);
3475 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ddx);
3476 (*p++) = vtn_tex_src(b, w[arg + 1], nir_tex_src_ddy);
3477 }
3478
3479 vtn_fail_if(util_bitcount(operands & (SpvImageOperandsConstOffsetsMask |
3480 SpvImageOperandsOffsetMask |
3481 SpvImageOperandsConstOffsetMask)) > 1,
3482 "At most one of the ConstOffset, Offset, and ConstOffsets "
3483 "image operands can be used on a given instruction.");
3484
3485 if (operands & SpvImageOperandsOffsetMask) {
3486 uint32_t arg = image_operand_arg(b, w, count, idx,
3487 SpvImageOperandsOffsetMask);
3488 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3489 }
3490
3491 if (operands & SpvImageOperandsConstOffsetMask) {
3492 uint32_t arg = image_operand_arg(b, w, count, idx,
3493 SpvImageOperandsConstOffsetMask);
3494 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3495 }
3496
3497 if (operands & SpvImageOperandsConstOffsetsMask) {
3498 vtn_assert(texop == nir_texop_tg4);
3499 uint32_t arg = image_operand_arg(b, w, count, idx,
3500 SpvImageOperandsConstOffsetsMask);
3501 gather_offsets = vtn_value(b, w[arg], vtn_value_type_constant);
3502 }
3503
3504 if (operands & SpvImageOperandsSampleMask) {
3505 vtn_assert(texop == nir_texop_txf_ms);
3506 uint32_t arg = image_operand_arg(b, w, count, idx,
3507 SpvImageOperandsSampleMask);
3508 texop = nir_texop_txf_ms;
3509 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ms_index);
3510 }
3511
3512 if (operands & SpvImageOperandsMinLodMask) {
3513 vtn_assert(texop == nir_texop_tex ||
3514 texop == nir_texop_txb ||
3515 texop == nir_texop_txd);
3516 uint32_t arg = image_operand_arg(b, w, count, idx,
3517 SpvImageOperandsMinLodMask);
3518 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_min_lod);
3519 }
3520 }
3521
3522 struct vtn_type *ret_type = vtn_get_type(b, w[1]);
3523 struct vtn_type *struct_type = NULL;
3524 if (is_sparse) {
3525 vtn_assert(glsl_type_is_struct_or_ifc(ret_type->type));
3526 struct_type = ret_type;
3527 ret_type = struct_type->members[1];
3528 }
3529
3530 nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
3531 instr->op = texop;
3532
3533 memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
3534
3535 instr->coord_components = coord_components;
3536 instr->sampler_dim = sampler_dim;
3537 instr->is_array = is_array;
3538 instr->is_shadow = is_shadow;
3539 instr->is_sparse = is_sparse;
3540 instr->is_new_style_shadow =
3541 is_shadow && glsl_get_components(ret_type->type) == 1;
3542 instr->component = gather_component;
3543
3544 /* If SpvCapabilityImageGatherBiasLodAMD is enabled, texture gather without an explicit LOD
3545 * has an implicit one (instead of using level 0).
3546 */
3547 if (texop == nir_texop_tg4 &&
3548 b->enabled_capabilities.ImageGatherBiasLodAMD &&
3549 !(operands & SpvImageOperandsLodMask)) {
3550 instr->is_gather_implicit_lod = true;
3551 }
3552
3553 /* The Vulkan spec says:
3554 *
3555 * "If an instruction loads from or stores to a resource (including
3556 * atomics and image instructions) and the resource descriptor being
3557 * accessed is not dynamically uniform, then the operand corresponding
3558 * to that resource (e.g. the pointer or sampled image operand) must be
3559 * decorated with NonUniform."
3560 *
3561 * It's very careful to specify that the exact operand must be decorated
3562 * NonUniform. The SPIR-V parser is not expected to chase through long
3563 * chains to find the NonUniform decoration. It's either right there or we
3564 * can assume it doesn't exist.
3565 */
3566 enum gl_access_qualifier access = 0;
3567 vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
3568
3569 if (operands & SpvImageOperandsNontemporalMask)
3570 access |= ACCESS_NON_TEMPORAL;
3571
3572 if (sampler && b->options->force_tex_non_uniform)
3573 access |= ACCESS_NON_UNIFORM;
3574
3575 if (sampled_val->propagated_non_uniform)
3576 access |= ACCESS_NON_UNIFORM;
3577
3578 if (image && (access & ACCESS_NON_UNIFORM))
3579 instr->texture_non_uniform = true;
3580
3581 if (sampler && (access & ACCESS_NON_UNIFORM))
3582 instr->sampler_non_uniform = true;
3583
3584 /* for non-query ops, get dest_type from SPIR-V return type */
3585 if (dest_type == nir_type_invalid) {
3586 /* the return type should match the image type, unless the image type is
3587 * VOID (CL image), in which case the return type dictates the sampler
3588 */
3589 enum glsl_base_type sampler_base =
3590 glsl_get_sampler_result_type(image->type);
3591 enum glsl_base_type ret_base = glsl_get_base_type(ret_type->type);
3592 vtn_fail_if(sampler_base != ret_base && sampler_base != GLSL_TYPE_VOID,
3593 "SPIR-V return type mismatches image type. This is only valid "
3594 "for untyped images (OpenCL).");
3595 dest_type = nir_get_nir_type_for_glsl_base_type(ret_base);
3596 dest_type = get_image_type(b, dest_type, operands);
3597 }
3598
3599 instr->dest_type = dest_type;
3600
3601 nir_def_init(&instr->instr, &instr->def,
3602 nir_tex_instr_dest_size(instr), 32);
3603
3604 vtn_assert(glsl_get_vector_elements(ret_type->type) ==
3605 nir_tex_instr_result_size(instr));
3606
3607 if (gather_offsets) {
3608 vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array ||
3609 gather_offsets->type->length != 4,
3610 "ConstOffsets must be an array of size four of vectors "
3611 "of two integer components");
3612
3613 struct vtn_type *vec_type = gather_offsets->type->array_element;
3614 vtn_fail_if(vec_type->base_type != vtn_base_type_vector ||
3615 vec_type->length != 2 ||
3616 !glsl_type_is_integer(vec_type->type),
3617 "ConstOffsets must be an array of size four of vectors "
3618 "of two integer components");
3619
3620 unsigned bit_size = glsl_get_bit_size(vec_type->type);
3621 for (uint32_t i = 0; i < 4; i++) {
3622 const nir_const_value *cvec =
3623 gather_offsets->constant->elements[i]->values;
3624 for (uint32_t j = 0; j < 2; j++) {
3625 switch (bit_size) {
3626 case 8: instr->tg4_offsets[i][j] = cvec[j].i8; break;
3627 case 16: instr->tg4_offsets[i][j] = cvec[j].i16; break;
3628 case 32: instr->tg4_offsets[i][j] = cvec[j].i32; break;
3629 case 64: instr->tg4_offsets[i][j] = cvec[j].i64; break;
3630 default:
3631 vtn_fail("Unsupported bit size: %u", bit_size);
3632 }
3633 }
3634 }
3635 }
3636
3637 nir_builder_instr_insert(&b->nb, &instr->instr);
3638
3639 if (is_sparse) {
3640 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3641 unsigned result_size = glsl_get_vector_elements(ret_type->type);
3642 dest->elems[0]->def = nir_channel(&b->nb, &instr->def, result_size);
3643 dest->elems[1]->def = nir_trim_vector(&b->nb, &instr->def,
3644 result_size);
3645 vtn_push_ssa_value(b, w[2], dest);
3646 } else {
3647 vtn_push_nir_ssa(b, w[2], &instr->def);
3648 }
3649 }
3650
3651 static nir_atomic_op
translate_atomic_op(SpvOp opcode)3652 translate_atomic_op(SpvOp opcode)
3653 {
3654 switch (opcode) {
3655 case SpvOpAtomicExchange: return nir_atomic_op_xchg;
3656 case SpvOpAtomicCompareExchange: return nir_atomic_op_cmpxchg;
3657 case SpvOpAtomicCompareExchangeWeak: return nir_atomic_op_cmpxchg;
3658 case SpvOpAtomicIIncrement: return nir_atomic_op_iadd;
3659 case SpvOpAtomicIDecrement: return nir_atomic_op_iadd;
3660 case SpvOpAtomicIAdd: return nir_atomic_op_iadd;
3661 case SpvOpAtomicISub: return nir_atomic_op_iadd;
3662 case SpvOpAtomicSMin: return nir_atomic_op_imin;
3663 case SpvOpAtomicUMin: return nir_atomic_op_umin;
3664 case SpvOpAtomicSMax: return nir_atomic_op_imax;
3665 case SpvOpAtomicUMax: return nir_atomic_op_umax;
3666 case SpvOpAtomicAnd: return nir_atomic_op_iand;
3667 case SpvOpAtomicOr: return nir_atomic_op_ior;
3668 case SpvOpAtomicXor: return nir_atomic_op_ixor;
3669 case SpvOpAtomicFAddEXT: return nir_atomic_op_fadd;
3670 case SpvOpAtomicFMinEXT: return nir_atomic_op_fmin;
3671 case SpvOpAtomicFMaxEXT: return nir_atomic_op_fmax;
3672 case SpvOpAtomicFlagTestAndSet: return nir_atomic_op_cmpxchg;
3673 default:
3674 unreachable("Invalid atomic");
3675 }
3676 }
3677
3678 static void
fill_common_atomic_sources(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_src * src)3679 fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
3680 const uint32_t *w, nir_src *src)
3681 {
3682 const struct glsl_type *type = vtn_get_type(b, w[1])->type;
3683 unsigned bit_size = glsl_get_bit_size(type);
3684
3685 switch (opcode) {
3686 case SpvOpAtomicIIncrement:
3687 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 1, bit_size));
3688 break;
3689
3690 case SpvOpAtomicIDecrement:
3691 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, bit_size));
3692 break;
3693
3694 case SpvOpAtomicISub:
3695 src[0] =
3696 nir_src_for_ssa(nir_ineg(&b->nb, vtn_get_nir_ssa(b, w[6])));
3697 break;
3698
3699 case SpvOpAtomicCompareExchange:
3700 case SpvOpAtomicCompareExchangeWeak:
3701 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[8]));
3702 src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[7]));
3703 break;
3704
3705 case SpvOpAtomicExchange:
3706 case SpvOpAtomicIAdd:
3707 case SpvOpAtomicSMin:
3708 case SpvOpAtomicUMin:
3709 case SpvOpAtomicSMax:
3710 case SpvOpAtomicUMax:
3711 case SpvOpAtomicAnd:
3712 case SpvOpAtomicOr:
3713 case SpvOpAtomicXor:
3714 case SpvOpAtomicFAddEXT:
3715 case SpvOpAtomicFMinEXT:
3716 case SpvOpAtomicFMaxEXT:
3717 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6]));
3718 break;
3719
3720 default:
3721 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3722 }
3723 }
3724
3725 static nir_def *
get_image_coord(struct vtn_builder * b,uint32_t value)3726 get_image_coord(struct vtn_builder *b, uint32_t value)
3727 {
3728 nir_def *coord = vtn_get_nir_ssa(b, value);
3729 /* The image_load_store intrinsics assume a 4-dim coordinate */
3730 return nir_pad_vec4(&b->nb, coord);
3731 }
3732
3733 static void
vtn_handle_image(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3734 vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
3735 const uint32_t *w, unsigned count)
3736 {
3737 /* Just get this one out of the way */
3738 if (opcode == SpvOpImageTexelPointer) {
3739 struct vtn_value *val =
3740 vtn_push_value(b, w[2], vtn_value_type_image_pointer);
3741 val->image = vtn_alloc(b, struct vtn_image_pointer);
3742
3743 val->image->image = vtn_nir_deref(b, w[3]);
3744 val->image->coord = get_image_coord(b, w[4]);
3745 val->image->sample = vtn_get_nir_ssa(b, w[5]);
3746 val->image->lod = nir_imm_int(&b->nb, 0);
3747 return;
3748 }
3749
3750 struct vtn_image_pointer image;
3751 SpvScope scope = SpvScopeInvocation;
3752 SpvMemorySemanticsMask semantics = 0;
3753 SpvImageOperandsMask operands = SpvImageOperandsMaskNone;
3754
3755 enum gl_access_qualifier access = 0;
3756
3757 struct vtn_value *res_val;
3758 switch (opcode) {
3759 case SpvOpAtomicExchange:
3760 case SpvOpAtomicCompareExchange:
3761 case SpvOpAtomicCompareExchangeWeak:
3762 case SpvOpAtomicIIncrement:
3763 case SpvOpAtomicIDecrement:
3764 case SpvOpAtomicIAdd:
3765 case SpvOpAtomicISub:
3766 case SpvOpAtomicLoad:
3767 case SpvOpAtomicSMin:
3768 case SpvOpAtomicUMin:
3769 case SpvOpAtomicSMax:
3770 case SpvOpAtomicUMax:
3771 case SpvOpAtomicAnd:
3772 case SpvOpAtomicOr:
3773 case SpvOpAtomicXor:
3774 case SpvOpAtomicFAddEXT:
3775 case SpvOpAtomicFMinEXT:
3776 case SpvOpAtomicFMaxEXT:
3777 res_val = vtn_value(b, w[3], vtn_value_type_image_pointer);
3778 image = *res_val->image;
3779 scope = vtn_constant_uint(b, w[4]);
3780 semantics = vtn_constant_uint(b, w[5]);
3781 access |= ACCESS_COHERENT;
3782 break;
3783
3784 case SpvOpAtomicStore:
3785 res_val = vtn_value(b, w[1], vtn_value_type_image_pointer);
3786 image = *res_val->image;
3787 scope = vtn_constant_uint(b, w[2]);
3788 semantics = vtn_constant_uint(b, w[3]);
3789 access |= ACCESS_COHERENT;
3790 break;
3791
3792 case SpvOpImageQuerySizeLod:
3793 res_val = vtn_untyped_value(b, w[3]);
3794 image.image = vtn_get_image(b, w[3], &access);
3795 image.coord = NULL;
3796 image.sample = NULL;
3797 image.lod = vtn_ssa_value(b, w[4])->def;
3798 break;
3799
3800 case SpvOpImageQueryFormat:
3801 case SpvOpImageQueryLevels:
3802 case SpvOpImageQueryOrder:
3803 case SpvOpImageQuerySamples:
3804 case SpvOpImageQuerySize:
3805 res_val = vtn_untyped_value(b, w[3]);
3806 image.image = vtn_get_image(b, w[3], &access);
3807 image.coord = NULL;
3808 image.sample = NULL;
3809 image.lod = NULL;
3810 break;
3811
3812 case SpvOpImageRead:
3813 case SpvOpImageSparseRead: {
3814 res_val = vtn_untyped_value(b, w[3]);
3815 image.image = vtn_get_image(b, w[3], &access);
3816 image.coord = get_image_coord(b, w[4]);
3817
3818 operands = count > 5 ? w[5] : SpvImageOperandsMaskNone;
3819
3820 if (operands & SpvImageOperandsSampleMask) {
3821 uint32_t arg = image_operand_arg(b, w, count, 5,
3822 SpvImageOperandsSampleMask);
3823 image.sample = vtn_get_nir_ssa(b, w[arg]);
3824 } else {
3825 image.sample = nir_undef(&b->nb, 1, 32);
3826 }
3827
3828 if (operands & SpvImageOperandsMakeTexelVisibleMask) {
3829 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3830 "MakeTexelVisible requires NonPrivateTexel to also be set.");
3831 uint32_t arg = image_operand_arg(b, w, count, 5,
3832 SpvImageOperandsMakeTexelVisibleMask);
3833 semantics = SpvMemorySemanticsMakeVisibleMask;
3834 scope = vtn_constant_uint(b, w[arg]);
3835 }
3836
3837 if (operands & SpvImageOperandsLodMask) {
3838 uint32_t arg = image_operand_arg(b, w, count, 5,
3839 SpvImageOperandsLodMask);
3840 image.lod = vtn_get_nir_ssa(b, w[arg]);
3841 } else {
3842 image.lod = nir_imm_int(&b->nb, 0);
3843 }
3844
3845 if (operands & SpvImageOperandsVolatileTexelMask)
3846 access |= ACCESS_VOLATILE;
3847 if (operands & SpvImageOperandsNontemporalMask)
3848 access |= ACCESS_NON_TEMPORAL;
3849
3850 break;
3851 }
3852
3853 case SpvOpImageWrite: {
3854 res_val = vtn_untyped_value(b, w[1]);
3855 image.image = vtn_get_image(b, w[1], &access);
3856 image.coord = get_image_coord(b, w[2]);
3857
3858 /* texel = w[3] */
3859
3860 operands = count > 4 ? w[4] : SpvImageOperandsMaskNone;
3861
3862 if (operands & SpvImageOperandsSampleMask) {
3863 uint32_t arg = image_operand_arg(b, w, count, 4,
3864 SpvImageOperandsSampleMask);
3865 image.sample = vtn_get_nir_ssa(b, w[arg]);
3866 } else {
3867 image.sample = nir_undef(&b->nb, 1, 32);
3868 }
3869
3870 if (operands & SpvImageOperandsMakeTexelAvailableMask) {
3871 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3872 "MakeTexelAvailable requires NonPrivateTexel to also be set.");
3873 uint32_t arg = image_operand_arg(b, w, count, 4,
3874 SpvImageOperandsMakeTexelAvailableMask);
3875 semantics = SpvMemorySemanticsMakeAvailableMask;
3876 scope = vtn_constant_uint(b, w[arg]);
3877 }
3878
3879 if (operands & SpvImageOperandsLodMask) {
3880 uint32_t arg = image_operand_arg(b, w, count, 4,
3881 SpvImageOperandsLodMask);
3882 image.lod = vtn_get_nir_ssa(b, w[arg]);
3883 } else {
3884 image.lod = nir_imm_int(&b->nb, 0);
3885 }
3886
3887 if (operands & SpvImageOperandsVolatileTexelMask)
3888 access |= ACCESS_VOLATILE;
3889 if (operands & SpvImageOperandsNontemporalMask)
3890 access |= ACCESS_NON_TEMPORAL;
3891
3892 break;
3893 }
3894
3895 default:
3896 vtn_fail_with_opcode("Invalid image opcode", opcode);
3897 }
3898
3899 if (semantics & SpvMemorySemanticsVolatileMask)
3900 access |= ACCESS_VOLATILE;
3901
3902 nir_intrinsic_op op;
3903 switch (opcode) {
3904 #define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_deref_##N; break;
3905 OP(ImageQuerySize, size)
3906 OP(ImageQuerySizeLod, size)
3907 OP(ImageRead, load)
3908 OP(ImageSparseRead, sparse_load)
3909 OP(ImageWrite, store)
3910 OP(AtomicLoad, load)
3911 OP(AtomicStore, store)
3912 OP(AtomicExchange, atomic)
3913 OP(AtomicCompareExchange, atomic_swap)
3914 OP(AtomicCompareExchangeWeak, atomic_swap)
3915 OP(AtomicIIncrement, atomic)
3916 OP(AtomicIDecrement, atomic)
3917 OP(AtomicIAdd, atomic)
3918 OP(AtomicISub, atomic)
3919 OP(AtomicSMin, atomic)
3920 OP(AtomicUMin, atomic)
3921 OP(AtomicSMax, atomic)
3922 OP(AtomicUMax, atomic)
3923 OP(AtomicAnd, atomic)
3924 OP(AtomicOr, atomic)
3925 OP(AtomicXor, atomic)
3926 OP(AtomicFAddEXT, atomic)
3927 OP(AtomicFMinEXT, atomic)
3928 OP(AtomicFMaxEXT, atomic)
3929 OP(ImageQueryFormat, format)
3930 OP(ImageQueryLevels, levels)
3931 OP(ImageQueryOrder, order)
3932 OP(ImageQuerySamples, samples)
3933 #undef OP
3934 default:
3935 vtn_fail_with_opcode("Invalid image opcode", opcode);
3936 }
3937
3938 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
3939 if (nir_intrinsic_has_atomic_op(intrin))
3940 nir_intrinsic_set_atomic_op(intrin, translate_atomic_op(opcode));
3941
3942 intrin->src[0] = nir_src_for_ssa(&image.image->def);
3943 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(image.image->type));
3944 nir_intrinsic_set_image_array(intrin,
3945 glsl_sampler_type_is_array(image.image->type));
3946
3947 switch (opcode) {
3948 case SpvOpImageQueryLevels:
3949 case SpvOpImageQuerySamples:
3950 case SpvOpImageQuerySize:
3951 case SpvOpImageQuerySizeLod:
3952 case SpvOpImageQueryFormat:
3953 case SpvOpImageQueryOrder:
3954 break;
3955 default:
3956 /* The image coordinate is always 4 components but we may not have that
3957 * many. Swizzle to compensate.
3958 */
3959 intrin->src[1] = nir_src_for_ssa(nir_pad_vec4(&b->nb, image.coord));
3960 intrin->src[2] = nir_src_for_ssa(image.sample);
3961 break;
3962 }
3963
3964 /* The Vulkan spec says:
3965 *
3966 * "If an instruction loads from or stores to a resource (including
3967 * atomics and image instructions) and the resource descriptor being
3968 * accessed is not dynamically uniform, then the operand corresponding
3969 * to that resource (e.g. the pointer or sampled image operand) must be
3970 * decorated with NonUniform."
3971 *
3972 * It's very careful to specify that the exact operand must be decorated
3973 * NonUniform. The SPIR-V parser is not expected to chase through long
3974 * chains to find the NonUniform decoration. It's either right there or we
3975 * can assume it doesn't exist.
3976 */
3977 vtn_foreach_decoration(b, res_val, non_uniform_decoration_cb, &access);
3978 nir_intrinsic_set_access(intrin, access);
3979
3980 switch (opcode) {
3981 case SpvOpImageQueryLevels:
3982 case SpvOpImageQuerySamples:
3983 case SpvOpImageQueryFormat:
3984 case SpvOpImageQueryOrder:
3985 /* No additional sources */
3986 break;
3987 case SpvOpImageQuerySize:
3988 intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0));
3989 break;
3990 case SpvOpImageQuerySizeLod:
3991 intrin->src[1] = nir_src_for_ssa(image.lod);
3992 break;
3993 case SpvOpAtomicLoad:
3994 case SpvOpImageRead:
3995 case SpvOpImageSparseRead:
3996 /* Only OpImageRead can support a lod parameter if
3997 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3998 * intrinsics definition for atomics requires us to set it for
3999 * OpAtomicLoad.
4000 */
4001 intrin->src[3] = nir_src_for_ssa(image.lod);
4002 break;
4003 case SpvOpAtomicStore:
4004 case SpvOpImageWrite: {
4005 const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3];
4006 struct vtn_ssa_value *value = vtn_ssa_value(b, value_id);
4007 /* nir_intrinsic_image_deref_store always takes a vec4 value */
4008 assert(op == nir_intrinsic_image_deref_store);
4009 intrin->num_components = 4;
4010 intrin->src[3] = nir_src_for_ssa(nir_pad_vec4(&b->nb, value->def));
4011 /* Only OpImageWrite can support a lod parameter if
4012 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
4013 * intrinsics definition for atomics requires us to set it for
4014 * OpAtomicStore.
4015 */
4016 intrin->src[4] = nir_src_for_ssa(image.lod);
4017
4018 nir_alu_type src_type =
4019 get_image_type(b, nir_get_nir_type_for_glsl_type(value->type), operands);
4020 nir_intrinsic_set_src_type(intrin, src_type);
4021 break;
4022 }
4023
4024 case SpvOpAtomicCompareExchange:
4025 case SpvOpAtomicCompareExchangeWeak:
4026 case SpvOpAtomicIIncrement:
4027 case SpvOpAtomicIDecrement:
4028 case SpvOpAtomicExchange:
4029 case SpvOpAtomicIAdd:
4030 case SpvOpAtomicISub:
4031 case SpvOpAtomicSMin:
4032 case SpvOpAtomicUMin:
4033 case SpvOpAtomicSMax:
4034 case SpvOpAtomicUMax:
4035 case SpvOpAtomicAnd:
4036 case SpvOpAtomicOr:
4037 case SpvOpAtomicXor:
4038 case SpvOpAtomicFAddEXT:
4039 case SpvOpAtomicFMinEXT:
4040 case SpvOpAtomicFMaxEXT:
4041 fill_common_atomic_sources(b, opcode, w, &intrin->src[3]);
4042 break;
4043
4044 default:
4045 vtn_fail_with_opcode("Invalid image opcode", opcode);
4046 }
4047
4048 /* Image operations implicitly have the Image storage memory semantics. */
4049 semantics |= SpvMemorySemanticsImageMemoryMask;
4050
4051 SpvMemorySemanticsMask before_semantics;
4052 SpvMemorySemanticsMask after_semantics;
4053 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
4054
4055 if (before_semantics)
4056 vtn_emit_memory_barrier(b, scope, before_semantics);
4057
4058 if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) {
4059 struct vtn_type *type = vtn_get_type(b, w[1]);
4060 struct vtn_type *struct_type = NULL;
4061 if (opcode == SpvOpImageSparseRead) {
4062 vtn_assert(glsl_type_is_struct_or_ifc(type->type));
4063 struct_type = type;
4064 type = struct_type->members[1];
4065 }
4066
4067 unsigned dest_components = glsl_get_vector_elements(type->type);
4068 if (opcode == SpvOpImageSparseRead)
4069 dest_components++;
4070
4071 if (nir_intrinsic_infos[op].dest_components == 0)
4072 intrin->num_components = dest_components;
4073
4074 unsigned bit_size = glsl_get_bit_size(type->type);
4075 if (opcode == SpvOpImageQuerySize ||
4076 opcode == SpvOpImageQuerySizeLod)
4077 bit_size = MIN2(bit_size, 32);
4078
4079 nir_def_init(&intrin->instr, &intrin->def,
4080 nir_intrinsic_dest_components(intrin), bit_size);
4081
4082 nir_builder_instr_insert(&b->nb, &intrin->instr);
4083
4084 nir_def *result = nir_trim_vector(&b->nb, &intrin->def,
4085 dest_components);
4086
4087 if (opcode == SpvOpImageQuerySize ||
4088 opcode == SpvOpImageQuerySizeLod)
4089 result = nir_u2uN(&b->nb, result, glsl_get_bit_size(type->type));
4090
4091 if (opcode == SpvOpImageSparseRead) {
4092 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
4093 unsigned res_type_size = glsl_get_vector_elements(type->type);
4094 dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size);
4095 if (intrin->def.bit_size != 32)
4096 dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def);
4097 dest->elems[1]->def = nir_trim_vector(&b->nb, result, res_type_size);
4098 vtn_push_ssa_value(b, w[2], dest);
4099 } else {
4100 vtn_push_nir_ssa(b, w[2], result);
4101 }
4102
4103 if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead ||
4104 opcode == SpvOpAtomicLoad) {
4105 nir_alu_type dest_type =
4106 get_image_type(b, nir_get_nir_type_for_glsl_type(type->type), operands);
4107 nir_intrinsic_set_dest_type(intrin, dest_type);
4108 }
4109 } else {
4110 nir_builder_instr_insert(&b->nb, &intrin->instr);
4111 }
4112
4113 if (after_semantics)
4114 vtn_emit_memory_barrier(b, scope, after_semantics);
4115 }
4116
4117 static nir_intrinsic_op
get_uniform_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)4118 get_uniform_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
4119 {
4120 switch (opcode) {
4121 #define OP(S, N) case SpvOp##S: return nir_intrinsic_atomic_counter_ ##N;
4122 OP(AtomicLoad, read_deref)
4123 OP(AtomicExchange, exchange)
4124 OP(AtomicCompareExchange, comp_swap)
4125 OP(AtomicCompareExchangeWeak, comp_swap)
4126 OP(AtomicIIncrement, inc_deref)
4127 OP(AtomicIDecrement, post_dec_deref)
4128 OP(AtomicIAdd, add_deref)
4129 OP(AtomicISub, add_deref)
4130 OP(AtomicUMin, min_deref)
4131 OP(AtomicUMax, max_deref)
4132 OP(AtomicAnd, and_deref)
4133 OP(AtomicOr, or_deref)
4134 OP(AtomicXor, xor_deref)
4135 #undef OP
4136 default:
4137 /* We left the following out: AtomicStore, AtomicSMin and
4138 * AtomicSmax. Right now there are not nir intrinsics for them. At this
4139 * moment Atomic Counter support is needed for ARB_spirv support, so is
4140 * only need to support GLSL Atomic Counters that are uints and don't
4141 * allow direct storage.
4142 */
4143 vtn_fail("Invalid uniform atomic");
4144 }
4145 }
4146
4147 static nir_intrinsic_op
get_deref_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)4148 get_deref_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
4149 {
4150 switch (opcode) {
4151 case SpvOpAtomicLoad: return nir_intrinsic_load_deref;
4152 case SpvOpAtomicFlagClear:
4153 case SpvOpAtomicStore: return nir_intrinsic_store_deref;
4154 #define OP(S, N) case SpvOp##S: return nir_intrinsic_deref_##N;
4155 OP(AtomicExchange, atomic)
4156 OP(AtomicCompareExchange, atomic_swap)
4157 OP(AtomicCompareExchangeWeak, atomic_swap)
4158 OP(AtomicIIncrement, atomic)
4159 OP(AtomicIDecrement, atomic)
4160 OP(AtomicIAdd, atomic)
4161 OP(AtomicISub, atomic)
4162 OP(AtomicSMin, atomic)
4163 OP(AtomicUMin, atomic)
4164 OP(AtomicSMax, atomic)
4165 OP(AtomicUMax, atomic)
4166 OP(AtomicAnd, atomic)
4167 OP(AtomicOr, atomic)
4168 OP(AtomicXor, atomic)
4169 OP(AtomicFAddEXT, atomic)
4170 OP(AtomicFMinEXT, atomic)
4171 OP(AtomicFMaxEXT, atomic)
4172 OP(AtomicFlagTestAndSet, atomic_swap)
4173 #undef OP
4174 default:
4175 vtn_fail_with_opcode("Invalid shared atomic", opcode);
4176 }
4177 }
4178
4179 /*
4180 * Handles shared atomics, ssbo atomics and atomic counters.
4181 */
4182 static void
vtn_handle_atomics(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4183 vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
4184 const uint32_t *w, UNUSED unsigned count)
4185 {
4186 struct vtn_pointer *ptr;
4187 nir_intrinsic_instr *atomic;
4188
4189 SpvScope scope = SpvScopeInvocation;
4190 SpvMemorySemanticsMask semantics = 0;
4191 enum gl_access_qualifier access = 0;
4192
4193 switch (opcode) {
4194 case SpvOpAtomicLoad:
4195 case SpvOpAtomicExchange:
4196 case SpvOpAtomicCompareExchange:
4197 case SpvOpAtomicCompareExchangeWeak:
4198 case SpvOpAtomicIIncrement:
4199 case SpvOpAtomicIDecrement:
4200 case SpvOpAtomicIAdd:
4201 case SpvOpAtomicISub:
4202 case SpvOpAtomicSMin:
4203 case SpvOpAtomicUMin:
4204 case SpvOpAtomicSMax:
4205 case SpvOpAtomicUMax:
4206 case SpvOpAtomicAnd:
4207 case SpvOpAtomicOr:
4208 case SpvOpAtomicXor:
4209 case SpvOpAtomicFAddEXT:
4210 case SpvOpAtomicFMinEXT:
4211 case SpvOpAtomicFMaxEXT:
4212 case SpvOpAtomicFlagTestAndSet:
4213 ptr = vtn_pointer(b, w[3]);
4214 scope = vtn_constant_uint(b, w[4]);
4215 semantics = vtn_constant_uint(b, w[5]);
4216 break;
4217 case SpvOpAtomicFlagClear:
4218 case SpvOpAtomicStore:
4219 ptr = vtn_pointer(b, w[1]);
4220 scope = vtn_constant_uint(b, w[2]);
4221 semantics = vtn_constant_uint(b, w[3]);
4222 break;
4223
4224 default:
4225 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
4226 }
4227
4228 if (semantics & SpvMemorySemanticsVolatileMask)
4229 access |= ACCESS_VOLATILE;
4230
4231 /* uniform as "atomic counter uniform" */
4232 if (ptr->mode == vtn_variable_mode_atomic_counter) {
4233 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
4234 nir_intrinsic_op op = get_uniform_nir_atomic_op(b, opcode);
4235 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
4236 atomic->src[0] = nir_src_for_ssa(&deref->def);
4237
4238 /* SSBO needs to initialize index/offset. In this case we don't need to,
4239 * as that info is already stored on the ptr->var->var nir_variable (see
4240 * vtn_create_variable)
4241 */
4242
4243 switch (opcode) {
4244 case SpvOpAtomicLoad:
4245 case SpvOpAtomicExchange:
4246 case SpvOpAtomicCompareExchange:
4247 case SpvOpAtomicCompareExchangeWeak:
4248 case SpvOpAtomicIIncrement:
4249 case SpvOpAtomicIDecrement:
4250 case SpvOpAtomicIAdd:
4251 case SpvOpAtomicISub:
4252 case SpvOpAtomicSMin:
4253 case SpvOpAtomicUMin:
4254 case SpvOpAtomicSMax:
4255 case SpvOpAtomicUMax:
4256 case SpvOpAtomicAnd:
4257 case SpvOpAtomicOr:
4258 case SpvOpAtomicXor:
4259 /* Nothing: we don't need to call fill_common_atomic_sources here, as
4260 * atomic counter uniforms doesn't have sources
4261 */
4262 break;
4263
4264 default:
4265 unreachable("Invalid SPIR-V atomic");
4266
4267 }
4268 } else {
4269 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
4270 const struct glsl_type *deref_type = deref->type;
4271 nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode);
4272 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
4273 atomic->src[0] = nir_src_for_ssa(&deref->def);
4274
4275 if (nir_intrinsic_has_atomic_op(atomic))
4276 nir_intrinsic_set_atomic_op(atomic, translate_atomic_op(opcode));
4277
4278 if (ptr->mode != vtn_variable_mode_workgroup)
4279 access |= ACCESS_COHERENT;
4280
4281 nir_intrinsic_set_access(atomic, access);
4282
4283 switch (opcode) {
4284 case SpvOpAtomicLoad:
4285 atomic->num_components = glsl_get_vector_elements(deref_type);
4286 break;
4287
4288 case SpvOpAtomicStore:
4289 atomic->num_components = glsl_get_vector_elements(deref_type);
4290 nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
4291 atomic->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
4292 break;
4293
4294 case SpvOpAtomicFlagClear:
4295 atomic->num_components = 1;
4296 nir_intrinsic_set_write_mask(atomic, 1);
4297 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4298 break;
4299 case SpvOpAtomicFlagTestAndSet:
4300 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4301 atomic->src[2] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, 32));
4302 break;
4303 case SpvOpAtomicExchange:
4304 case SpvOpAtomicCompareExchange:
4305 case SpvOpAtomicCompareExchangeWeak:
4306 case SpvOpAtomicIIncrement:
4307 case SpvOpAtomicIDecrement:
4308 case SpvOpAtomicIAdd:
4309 case SpvOpAtomicISub:
4310 case SpvOpAtomicSMin:
4311 case SpvOpAtomicUMin:
4312 case SpvOpAtomicSMax:
4313 case SpvOpAtomicUMax:
4314 case SpvOpAtomicAnd:
4315 case SpvOpAtomicOr:
4316 case SpvOpAtomicXor:
4317 case SpvOpAtomicFAddEXT:
4318 case SpvOpAtomicFMinEXT:
4319 case SpvOpAtomicFMaxEXT:
4320 fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
4321 break;
4322
4323 default:
4324 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
4325 }
4326 }
4327
4328 /* Atomic ordering operations will implicitly apply to the atomic operation
4329 * storage class, so include that too.
4330 */
4331 semantics |= vtn_mode_to_memory_semantics(ptr->mode);
4332
4333 SpvMemorySemanticsMask before_semantics;
4334 SpvMemorySemanticsMask after_semantics;
4335 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
4336
4337 if (before_semantics)
4338 vtn_emit_memory_barrier(b, scope, before_semantics);
4339
4340 if (opcode != SpvOpAtomicStore && opcode != SpvOpAtomicFlagClear) {
4341 struct vtn_type *type = vtn_get_type(b, w[1]);
4342
4343 if (opcode == SpvOpAtomicFlagTestAndSet) {
4344 /* map atomic flag to a 32-bit atomic integer. */
4345 nir_def_init(&atomic->instr, &atomic->def, 1, 32);
4346 } else {
4347 nir_def_init(&atomic->instr, &atomic->def,
4348 glsl_get_vector_elements(type->type),
4349 glsl_get_bit_size(type->type));
4350
4351 vtn_push_nir_ssa(b, w[2], &atomic->def);
4352 }
4353 }
4354
4355 nir_builder_instr_insert(&b->nb, &atomic->instr);
4356
4357 if (opcode == SpvOpAtomicFlagTestAndSet) {
4358 vtn_push_nir_ssa(b, w[2], nir_i2b(&b->nb, &atomic->def));
4359 }
4360 if (after_semantics)
4361 vtn_emit_memory_barrier(b, scope, after_semantics);
4362 }
4363
4364 static nir_alu_instr *
create_vec(struct vtn_builder * b,unsigned num_components,unsigned bit_size)4365 create_vec(struct vtn_builder *b, unsigned num_components, unsigned bit_size)
4366 {
4367 nir_op op = nir_op_vec(num_components);
4368 nir_alu_instr *vec = nir_alu_instr_create(b->shader, op);
4369 nir_def_init(&vec->instr, &vec->def, num_components, bit_size);
4370
4371 return vec;
4372 }
4373
4374 struct vtn_ssa_value *
vtn_ssa_transpose(struct vtn_builder * b,struct vtn_ssa_value * src)4375 vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
4376 {
4377 if (src->transposed)
4378 return src->transposed;
4379
4380 struct vtn_ssa_value *dest =
4381 vtn_create_ssa_value(b, glsl_transposed_type(src->type));
4382
4383 for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
4384 if (glsl_type_is_vector_or_scalar(src->type)) {
4385 dest->elems[i]->def = nir_channel(&b->nb, src->def, i);
4386 } else {
4387 unsigned cols = glsl_get_matrix_columns(src->type);
4388 nir_scalar srcs[NIR_MAX_MATRIX_COLUMNS];
4389 for (unsigned j = 0; j < cols; j++) {
4390 srcs[j] = nir_get_scalar(src->elems[j]->def, i);
4391 }
4392 dest->elems[i]->def = nir_vec_scalars(&b->nb, srcs, cols);
4393 }
4394 }
4395
4396 dest->transposed = src;
4397
4398 return dest;
4399 }
4400
4401 static nir_def *
vtn_vector_shuffle(struct vtn_builder * b,unsigned num_components,nir_def * src0,nir_def * src1,const uint32_t * indices)4402 vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
4403 nir_def *src0, nir_def *src1,
4404 const uint32_t *indices)
4405 {
4406 nir_alu_instr *vec = create_vec(b, num_components, src0->bit_size);
4407
4408 for (unsigned i = 0; i < num_components; i++) {
4409 uint32_t index = indices[i];
4410 unsigned total_components = src0->num_components + src1->num_components;
4411 vtn_fail_if(index != 0xffffffff && index >= total_components,
4412 "OpVectorShuffle: All Component literals must either be "
4413 "FFFFFFFF or in [0, N - 1] (inclusive)");
4414
4415 if (index == 0xffffffff) {
4416 vec->src[i].src =
4417 nir_src_for_ssa(nir_undef(&b->nb, 1, src0->bit_size));
4418 } else if (index < src0->num_components) {
4419 vec->src[i].src = nir_src_for_ssa(src0);
4420 vec->src[i].swizzle[0] = index;
4421 } else {
4422 vec->src[i].src = nir_src_for_ssa(src1);
4423 vec->src[i].swizzle[0] = index - src0->num_components;
4424 }
4425 }
4426
4427 nir_builder_instr_insert(&b->nb, &vec->instr);
4428
4429 return &vec->def;
4430 }
4431
4432 /*
4433 * Concatentates a number of vectors/scalars together to produce a vector
4434 */
4435 static nir_def *
vtn_vector_construct(struct vtn_builder * b,unsigned num_components,unsigned num_srcs,nir_def ** srcs)4436 vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
4437 unsigned num_srcs, nir_def **srcs)
4438 {
4439 nir_alu_instr *vec = create_vec(b, num_components, srcs[0]->bit_size);
4440
4441 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4442 *
4443 * "When constructing a vector, there must be at least two Constituent
4444 * operands."
4445 */
4446 vtn_assert(num_srcs >= 2);
4447
4448 unsigned dest_idx = 0;
4449 for (unsigned i = 0; i < num_srcs; i++) {
4450 nir_def *src = srcs[i];
4451 vtn_assert(dest_idx + src->num_components <= num_components);
4452 for (unsigned j = 0; j < src->num_components; j++) {
4453 vec->src[dest_idx].src = nir_src_for_ssa(src);
4454 vec->src[dest_idx].swizzle[0] = j;
4455 dest_idx++;
4456 }
4457 }
4458
4459 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4460 *
4461 * "When constructing a vector, the total number of components in all
4462 * the operands must equal the number of components in Result Type."
4463 */
4464 vtn_assert(dest_idx == num_components);
4465
4466 nir_builder_instr_insert(&b->nb, &vec->instr);
4467
4468 return &vec->def;
4469 }
4470
4471 static struct vtn_ssa_value *
vtn_composite_copy(struct vtn_builder * b,struct vtn_ssa_value * src)4472 vtn_composite_copy(struct vtn_builder *b, struct vtn_ssa_value *src)
4473 {
4474 assert(!src->is_variable);
4475
4476 struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
4477 dest->type = src->type;
4478
4479 if (glsl_type_is_vector_or_scalar(src->type)) {
4480 dest->def = src->def;
4481 } else {
4482 unsigned elems = glsl_get_length(src->type);
4483
4484 dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
4485 for (unsigned i = 0; i < elems; i++)
4486 dest->elems[i] = vtn_composite_copy(b, src->elems[i]);
4487 }
4488
4489 return dest;
4490 }
4491
4492 static struct vtn_ssa_value *
vtn_composite_insert(struct vtn_builder * b,struct vtn_ssa_value * src,struct vtn_ssa_value * insert,const uint32_t * indices,unsigned num_indices)4493 vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
4494 struct vtn_ssa_value *insert, const uint32_t *indices,
4495 unsigned num_indices)
4496 {
4497 if (glsl_type_is_cmat(src->type))
4498 return vtn_cooperative_matrix_insert(b, src, insert, indices, num_indices);
4499
4500 struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
4501
4502 struct vtn_ssa_value *cur = dest;
4503 unsigned i;
4504 for (i = 0; i < num_indices - 1; i++) {
4505 /* If we got a vector here, that means the next index will be trying to
4506 * dereference a scalar.
4507 */
4508 vtn_fail_if(glsl_type_is_vector_or_scalar(cur->type),
4509 "OpCompositeInsert has too many indices.");
4510 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4511 "All indices in an OpCompositeInsert must be in-bounds");
4512 cur = cur->elems[indices[i]];
4513 }
4514
4515 if (glsl_type_is_vector_or_scalar(cur->type)) {
4516 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4517 "All indices in an OpCompositeInsert must be in-bounds");
4518
4519 /* According to the SPIR-V spec, OpCompositeInsert may work down to
4520 * the component granularity. In that case, the last index will be
4521 * the index to insert the scalar into the vector.
4522 */
4523
4524 cur->def = nir_vector_insert_imm(&b->nb, cur->def, insert->def, indices[i]);
4525 } else {
4526 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4527 "All indices in an OpCompositeInsert must be in-bounds");
4528 cur->elems[indices[i]] = insert;
4529 }
4530
4531 return dest;
4532 }
4533
4534 static struct vtn_ssa_value *
vtn_composite_extract(struct vtn_builder * b,struct vtn_ssa_value * src,const uint32_t * indices,unsigned num_indices)4535 vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
4536 const uint32_t *indices, unsigned num_indices)
4537 {
4538 if (glsl_type_is_cmat(src->type))
4539 return vtn_cooperative_matrix_extract(b, src, indices, num_indices);
4540
4541 struct vtn_ssa_value *cur = src;
4542 for (unsigned i = 0; i < num_indices; i++) {
4543 if (glsl_type_is_vector_or_scalar(cur->type)) {
4544 vtn_assert(i == num_indices - 1);
4545 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4546 "All indices in an OpCompositeExtract must be in-bounds");
4547
4548 /* According to the SPIR-V spec, OpCompositeExtract may work down to
4549 * the component granularity. The last index will be the index of the
4550 * vector to extract.
4551 */
4552
4553 const struct glsl_type *scalar_type =
4554 glsl_scalar_type(glsl_get_base_type(cur->type));
4555 struct vtn_ssa_value *ret = vtn_create_ssa_value(b, scalar_type);
4556 ret->def = nir_channel(&b->nb, cur->def, indices[i]);
4557 return ret;
4558 } else {
4559 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4560 "All indices in an OpCompositeExtract must be in-bounds");
4561 cur = cur->elems[indices[i]];
4562 }
4563 }
4564
4565 return cur;
4566 }
4567
4568 static void
vtn_handle_composite(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4569 vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
4570 const uint32_t *w, unsigned count)
4571 {
4572 struct vtn_type *type = vtn_get_type(b, w[1]);
4573 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
4574
4575 switch (opcode) {
4576 case SpvOpVectorExtractDynamic:
4577 ssa->def = nir_vector_extract(&b->nb, vtn_get_nir_ssa(b, w[3]),
4578 vtn_get_nir_ssa(b, w[4]));
4579 break;
4580
4581 case SpvOpVectorInsertDynamic:
4582 ssa->def = nir_vector_insert(&b->nb, vtn_get_nir_ssa(b, w[3]),
4583 vtn_get_nir_ssa(b, w[4]),
4584 vtn_get_nir_ssa(b, w[5]));
4585 break;
4586
4587 case SpvOpVectorShuffle:
4588 ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type),
4589 vtn_get_nir_ssa(b, w[3]),
4590 vtn_get_nir_ssa(b, w[4]),
4591 w + 5);
4592 break;
4593
4594 case SpvOpCompositeConstruct:
4595 case SpvOpCompositeConstructReplicateEXT: {
4596 unsigned elems = count - 3;
4597 assume(elems >= 1);
4598 if (type->base_type == vtn_base_type_cooperative_matrix) {
4599 vtn_assert(elems == 1);
4600 nir_deref_instr *mat = vtn_create_cmat_temporary(b, type->type, "cmat_construct");
4601 nir_cmat_construct(&b->nb, &mat->def, vtn_get_nir_ssa(b, w[3]));
4602 vtn_set_ssa_value_var(b, ssa, mat->var);
4603 } else if (glsl_type_is_vector_or_scalar(type->type)) {
4604 if (opcode == SpvOpCompositeConstructReplicateEXT) {
4605 nir_def *src = vtn_get_nir_ssa(b, w[3]);
4606 vtn_assert(glsl_get_bit_size(type->type) == src->bit_size);
4607 unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
4608 ssa->def = nir_swizzle(&b->nb, src, swiz,
4609 glsl_get_vector_elements(type->type));
4610 } else {
4611 nir_def *srcs[NIR_MAX_VEC_COMPONENTS];
4612 for (unsigned i = 0; i < elems; i++) {
4613 srcs[i] = vtn_get_nir_ssa(b, w[3 + i]);
4614 vtn_assert(glsl_get_bit_size(type->type) == srcs[i]->bit_size);
4615 }
4616 ssa->def =
4617 vtn_vector_construct(b, glsl_get_vector_elements(type->type),
4618 elems, srcs);
4619 }
4620 } else {
4621 ssa->elems = vtn_alloc_array(b, struct vtn_ssa_value *, type->length);
4622 if (opcode == SpvOpCompositeConstructReplicateEXT) {
4623 struct vtn_ssa_value *elem = vtn_ssa_value(b, w[3]);
4624 for (unsigned i = 0; i < type->length; i++)
4625 ssa->elems[i] = elem;
4626 } else {
4627 vtn_fail_if(elems != type->length,
4628 "%s has %u constituents, expected %u",
4629 spirv_op_to_string(opcode), elems, type->length);
4630 for (unsigned i = 0; i < elems; i++)
4631 ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
4632 }
4633 }
4634 break;
4635 }
4636 case SpvOpCompositeExtract:
4637 ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
4638 w + 4, count - 4);
4639 break;
4640
4641 case SpvOpCompositeInsert:
4642 ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
4643 vtn_ssa_value(b, w[3]),
4644 w + 5, count - 5);
4645 break;
4646
4647 case SpvOpCopyLogical: {
4648 ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
4649 struct vtn_type *dst_type = vtn_get_value_type(b, w[2]);
4650 vtn_assert(vtn_types_compatible(b, type, dst_type));
4651 ssa->type = glsl_get_bare_type(dst_type->type);
4652 break;
4653 }
4654 case SpvOpCopyObject:
4655 case SpvOpExpectKHR:
4656 vtn_copy_value(b, w[3], w[2]);
4657 return;
4658
4659 default:
4660 vtn_fail_with_opcode("unknown composite operation", opcode);
4661 }
4662
4663 vtn_push_ssa_value(b, w[2], ssa);
4664 }
4665
4666 static void
vtn_handle_barrier(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4667 vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
4668 const uint32_t *w, UNUSED unsigned count)
4669 {
4670 switch (opcode) {
4671 case SpvOpEmitVertex:
4672 case SpvOpEmitStreamVertex:
4673 case SpvOpEndPrimitive:
4674 case SpvOpEndStreamPrimitive: {
4675 unsigned stream = 0;
4676 if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
4677 stream = vtn_constant_uint(b, w[1]);
4678
4679 switch (opcode) {
4680 case SpvOpEmitStreamVertex:
4681 case SpvOpEmitVertex:
4682 nir_emit_vertex(&b->nb, stream);
4683 break;
4684 case SpvOpEndPrimitive:
4685 case SpvOpEndStreamPrimitive:
4686 nir_end_primitive(&b->nb, stream);
4687 break;
4688 default:
4689 unreachable("Invalid opcode");
4690 }
4691 break;
4692 }
4693
4694 case SpvOpMemoryBarrier: {
4695 SpvScope scope = vtn_constant_uint(b, w[1]);
4696 SpvMemorySemanticsMask semantics = vtn_constant_uint(b, w[2]);
4697 vtn_emit_memory_barrier(b, scope, semantics);
4698 return;
4699 }
4700
4701 case SpvOpControlBarrier: {
4702 SpvScope execution_scope = vtn_constant_uint(b, w[1]);
4703 SpvScope memory_scope = vtn_constant_uint(b, w[2]);
4704 SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]);
4705
4706 /* GLSLang, prior to commit 8297936dd6eb3, emitted OpControlBarrier with
4707 * memory semantics of None for GLSL barrier().
4708 * And before that, prior to c3f1cdfa, emitted the OpControlBarrier with
4709 * Device instead of Workgroup for execution scope.
4710 */
4711 if (b->wa_glslang_cs_barrier &&
4712 b->nb.shader->info.stage == MESA_SHADER_COMPUTE &&
4713 (execution_scope == SpvScopeWorkgroup ||
4714 execution_scope == SpvScopeDevice) &&
4715 memory_semantics == SpvMemorySemanticsMaskNone) {
4716 execution_scope = SpvScopeWorkgroup;
4717 memory_scope = SpvScopeWorkgroup;
4718 memory_semantics = SpvMemorySemanticsAcquireReleaseMask |
4719 SpvMemorySemanticsWorkgroupMemoryMask;
4720 }
4721
4722 /* From the SPIR-V spec:
4723 *
4724 * "When used with the TessellationControl execution model, it also
4725 * implicitly synchronizes the Output Storage Class: Writes to Output
4726 * variables performed by any invocation executed prior to a
4727 * OpControlBarrier will be visible to any other invocation after
4728 * return from that OpControlBarrier."
4729 *
4730 * The same applies to VK_NV_mesh_shader.
4731 */
4732 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL ||
4733 b->nb.shader->info.stage == MESA_SHADER_TASK ||
4734 b->nb.shader->info.stage == MESA_SHADER_MESH) {
4735 memory_semantics &= ~(SpvMemorySemanticsAcquireMask |
4736 SpvMemorySemanticsReleaseMask |
4737 SpvMemorySemanticsAcquireReleaseMask |
4738 SpvMemorySemanticsSequentiallyConsistentMask);
4739 memory_semantics |= SpvMemorySemanticsAcquireReleaseMask |
4740 SpvMemorySemanticsOutputMemoryMask;
4741 if (memory_scope == SpvScopeSubgroup || memory_scope == SpvScopeInvocation)
4742 memory_scope = SpvScopeWorkgroup;
4743 }
4744
4745 vtn_emit_scoped_control_barrier(b, execution_scope, memory_scope,
4746 memory_semantics);
4747 break;
4748 }
4749
4750 default:
4751 unreachable("unknown barrier instruction");
4752 }
4753 }
4754
4755 static enum tess_primitive_mode
tess_primitive_mode_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4756 tess_primitive_mode_from_spv_execution_mode(struct vtn_builder *b,
4757 SpvExecutionMode mode)
4758 {
4759 switch (mode) {
4760 case SpvExecutionModeTriangles:
4761 return TESS_PRIMITIVE_TRIANGLES;
4762 case SpvExecutionModeQuads:
4763 return TESS_PRIMITIVE_QUADS;
4764 case SpvExecutionModeIsolines:
4765 return TESS_PRIMITIVE_ISOLINES;
4766 default:
4767 vtn_fail("Invalid tess primitive type: %s (%u)",
4768 spirv_executionmode_to_string(mode), mode);
4769 }
4770 }
4771
4772 static enum mesa_prim
primitive_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4773 primitive_from_spv_execution_mode(struct vtn_builder *b,
4774 SpvExecutionMode mode)
4775 {
4776 switch (mode) {
4777 case SpvExecutionModeInputPoints:
4778 case SpvExecutionModeOutputPoints:
4779 return MESA_PRIM_POINTS;
4780 case SpvExecutionModeInputLines:
4781 case SpvExecutionModeOutputLinesNV:
4782 return MESA_PRIM_LINES;
4783 case SpvExecutionModeInputLinesAdjacency:
4784 return MESA_PRIM_LINES_ADJACENCY;
4785 case SpvExecutionModeTriangles:
4786 case SpvExecutionModeOutputTrianglesNV:
4787 return MESA_PRIM_TRIANGLES;
4788 case SpvExecutionModeInputTrianglesAdjacency:
4789 return MESA_PRIM_TRIANGLES_ADJACENCY;
4790 case SpvExecutionModeQuads:
4791 return MESA_PRIM_QUADS;
4792 case SpvExecutionModeOutputLineStrip:
4793 return MESA_PRIM_LINE_STRIP;
4794 case SpvExecutionModeOutputTriangleStrip:
4795 return MESA_PRIM_TRIANGLE_STRIP;
4796 default:
4797 vtn_fail("Invalid primitive type: %s (%u)",
4798 spirv_executionmode_to_string(mode), mode);
4799 }
4800 }
4801
4802 static unsigned
vertices_in_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4803 vertices_in_from_spv_execution_mode(struct vtn_builder *b,
4804 SpvExecutionMode mode)
4805 {
4806 switch (mode) {
4807 case SpvExecutionModeInputPoints:
4808 return 1;
4809 case SpvExecutionModeInputLines:
4810 return 2;
4811 case SpvExecutionModeInputLinesAdjacency:
4812 return 4;
4813 case SpvExecutionModeTriangles:
4814 return 3;
4815 case SpvExecutionModeInputTrianglesAdjacency:
4816 return 6;
4817 default:
4818 vtn_fail("Invalid GS input mode: %s (%u)",
4819 spirv_executionmode_to_string(mode), mode);
4820 }
4821 }
4822
4823 gl_shader_stage
vtn_stage_for_execution_model(SpvExecutionModel model)4824 vtn_stage_for_execution_model(SpvExecutionModel model)
4825 {
4826 switch (model) {
4827 case SpvExecutionModelVertex:
4828 return MESA_SHADER_VERTEX;
4829 case SpvExecutionModelTessellationControl:
4830 return MESA_SHADER_TESS_CTRL;
4831 case SpvExecutionModelTessellationEvaluation:
4832 return MESA_SHADER_TESS_EVAL;
4833 case SpvExecutionModelGeometry:
4834 return MESA_SHADER_GEOMETRY;
4835 case SpvExecutionModelFragment:
4836 return MESA_SHADER_FRAGMENT;
4837 case SpvExecutionModelGLCompute:
4838 return MESA_SHADER_COMPUTE;
4839 case SpvExecutionModelKernel:
4840 return MESA_SHADER_KERNEL;
4841 case SpvExecutionModelRayGenerationKHR:
4842 return MESA_SHADER_RAYGEN;
4843 case SpvExecutionModelAnyHitKHR:
4844 return MESA_SHADER_ANY_HIT;
4845 case SpvExecutionModelClosestHitKHR:
4846 return MESA_SHADER_CLOSEST_HIT;
4847 case SpvExecutionModelMissKHR:
4848 return MESA_SHADER_MISS;
4849 case SpvExecutionModelIntersectionKHR:
4850 return MESA_SHADER_INTERSECTION;
4851 case SpvExecutionModelCallableKHR:
4852 return MESA_SHADER_CALLABLE;
4853 case SpvExecutionModelTaskNV:
4854 case SpvExecutionModelTaskEXT:
4855 return MESA_SHADER_TASK;
4856 case SpvExecutionModelMeshNV:
4857 case SpvExecutionModelMeshEXT:
4858 return MESA_SHADER_MESH;
4859 default:
4860 return MESA_SHADER_NONE;
4861 }
4862 }
4863
4864 void
vtn_handle_entry_point(struct vtn_builder * b,const uint32_t * w,unsigned count)4865 vtn_handle_entry_point(struct vtn_builder *b, const uint32_t *w,
4866 unsigned count)
4867 {
4868 struct vtn_value *entry_point = &b->values[w[2]];
4869 /* Let this be a name label regardless */
4870 unsigned name_words;
4871 entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
4872 entry_point->is_entrypoint = true;
4873
4874 gl_shader_stage stage = vtn_stage_for_execution_model(w[1]);
4875 vtn_fail_if(stage == MESA_SHADER_NONE,
4876 "Unsupported execution model: %s (%u)",
4877 spirv_executionmodel_to_string(w[1]), w[1]);
4878 if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
4879 stage != b->entry_point_stage)
4880 return;
4881
4882 vtn_assert(b->entry_point == NULL);
4883 b->entry_point = entry_point;
4884
4885 /* Entry points enumerate which global variables are used. */
4886 size_t start = 3 + name_words;
4887 b->interface_ids_count = count - start;
4888 b->interface_ids = vtn_alloc_array(b, uint32_t, b->interface_ids_count);
4889 memcpy(b->interface_ids, &w[start], b->interface_ids_count * 4);
4890 qsort(b->interface_ids, b->interface_ids_count, 4, cmp_uint32_t);
4891 }
4892
4893 static bool
vtn_handle_preamble_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4894 vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
4895 const uint32_t *w, unsigned count)
4896 {
4897 switch (opcode) {
4898 case SpvOpString:
4899 case SpvOpSource:
4900 case SpvOpSourceExtension:
4901 case SpvOpSourceContinued:
4902 case SpvOpModuleProcessed:
4903 vtn_handle_debug_text(b, opcode, w, count);
4904 break;
4905
4906 case SpvOpExtension: {
4907 /* Implementing both NV_mesh_shader and EXT_mesh_shader
4908 * is difficult without knowing which we're dealing with.
4909 * TODO: remove this when we stop supporting NV_mesh_shader.
4910 */
4911 const char *ext_name = (const char *)&w[1];
4912 if (strcmp(ext_name, "SPV_NV_mesh_shader") == 0)
4913 b->shader->info.mesh.nv = true;
4914 break;
4915 }
4916
4917 case SpvOpCapability: {
4918 SpvCapability cap = w[1];
4919 switch (cap) {
4920 case SpvCapabilitySubgroupDispatch:
4921 /* Missing :
4922 * - SpvOpGetKernelLocalSizeForSubgroupCount
4923 * - SpvOpGetKernelMaxNumSubgroups
4924 */
4925 vtn_warn("Not fully supported capability: %s",
4926 spirv_capability_to_string(cap));
4927 break;
4928
4929 case SpvCapabilityOptNoneEXT:
4930 /* This is a "strong request" not to optimize a function, usually
4931 * because it's a compute shader and the workgroup size etc is
4932 * manually tuned and we shouldn't risk undoing it. Someday!
4933 */
4934 vtn_info("Not fully supported capability: %s",
4935 spirv_capability_to_string(cap));
4936 break;
4937
4938 default:
4939 vtn_fail_if(!spirv_capabilities_get(&implemented_capabilities, cap),
4940 "Unimplemented SPIR-V capability: %s (%u)",
4941 spirv_capability_to_string(cap), cap);
4942 }
4943
4944 if (!spirv_capabilities_get(&b->supported_capabilities, cap)) {
4945 vtn_warn("Unsupported SPIR-V capability: %s (%u)",
4946 spirv_capability_to_string(cap), cap);
4947 }
4948
4949 spirv_capabilities_set(&b->enabled_capabilities, cap, true);
4950 break;
4951 }
4952
4953 case SpvOpExtInstImport:
4954 vtn_handle_extension(b, opcode, w, count);
4955 break;
4956
4957 case SpvOpMemoryModel:
4958 switch (w[1]) {
4959 case SpvAddressingModelPhysical32:
4960 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4961 "AddressingModelPhysical32 only supported for kernels");
4962 b->shader->info.cs.ptr_size = 32;
4963 b->physical_ptrs = true;
4964 assert(nir_address_format_bit_size(b->options->global_addr_format) == 32);
4965 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4966 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 32);
4967 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4968 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 32);
4969 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4970 break;
4971 case SpvAddressingModelPhysical64:
4972 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4973 "AddressingModelPhysical64 only supported for kernels");
4974 b->shader->info.cs.ptr_size = 64;
4975 b->physical_ptrs = true;
4976 assert(nir_address_format_bit_size(b->options->global_addr_format) == 64);
4977 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4978 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 64);
4979 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4980 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 64);
4981 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4982 break;
4983 case SpvAddressingModelLogical:
4984 vtn_fail_if(b->shader->info.stage == MESA_SHADER_KERNEL,
4985 "AddressingModelLogical only supported for shaders");
4986 b->physical_ptrs = false;
4987 break;
4988 case SpvAddressingModelPhysicalStorageBuffer64:
4989 vtn_fail_if(!b->supported_capabilities.PhysicalStorageBufferAddresses,
4990 "AddressingModelPhysicalStorageBuffer64 not supported");
4991 break;
4992 default:
4993 vtn_fail("Unknown addressing model: %s (%u)",
4994 spirv_addressingmodel_to_string(w[1]), w[1]);
4995 break;
4996 }
4997
4998 b->mem_model = w[2];
4999 switch (w[2]) {
5000 case SpvMemoryModelSimple:
5001 case SpvMemoryModelGLSL450:
5002 case SpvMemoryModelOpenCL:
5003 break;
5004 case SpvMemoryModelVulkan:
5005 vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
5006 "Vulkan memory model is unsupported by this driver");
5007 break;
5008 default:
5009 vtn_fail("Unsupported memory model: %s",
5010 spirv_memorymodel_to_string(w[2]));
5011 break;
5012 }
5013 break;
5014
5015 case SpvOpEntryPoint:
5016 vtn_handle_entry_point(b, w, count);
5017 break;
5018
5019 case SpvOpName:
5020 b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
5021 break;
5022
5023 case SpvOpMemberName:
5024 case SpvOpExecutionMode:
5025 case SpvOpExecutionModeId:
5026 case SpvOpDecorationGroup:
5027 case SpvOpDecorate:
5028 case SpvOpDecorateId:
5029 case SpvOpMemberDecorate:
5030 case SpvOpGroupDecorate:
5031 case SpvOpGroupMemberDecorate:
5032 case SpvOpDecorateString:
5033 case SpvOpMemberDecorateString:
5034 vtn_handle_decoration(b, opcode, w, count);
5035 break;
5036
5037 case SpvOpExtInst:
5038 case SpvOpExtInstWithForwardRefsKHR: {
5039 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5040 if (val->ext_handler == vtn_handle_non_semantic_instruction) {
5041 /* NonSemantic extended instructions are acceptable in preamble. */
5042 vtn_handle_non_semantic_instruction(b, w[4], w, count);
5043 return true;
5044 } else {
5045 return false; /* End of preamble. */
5046 }
5047 }
5048
5049 default:
5050 return false; /* End of preamble */
5051 }
5052
5053 return true;
5054 }
5055
5056 void
vtn_handle_debug_text(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5057 vtn_handle_debug_text(struct vtn_builder *b, SpvOp opcode,
5058 const uint32_t *w, unsigned count)
5059 {
5060 switch (opcode) {
5061 case SpvOpString:
5062 vtn_push_value(b, w[1], vtn_value_type_string)->str =
5063 vtn_string_literal(b, &w[2], count - 2, NULL);
5064 break;
5065
5066 case SpvOpSource: {
5067 const char *lang;
5068 switch (w[1]) {
5069 default:
5070 case SpvSourceLanguageUnknown: lang = "unknown"; break;
5071 case SpvSourceLanguageESSL: lang = "ESSL"; break;
5072 case SpvSourceLanguageGLSL: lang = "GLSL"; break;
5073 case SpvSourceLanguageOpenCL_C: lang = "OpenCL C"; break;
5074 case SpvSourceLanguageOpenCL_CPP: lang = "OpenCL C++"; break;
5075 case SpvSourceLanguageHLSL: lang = "HLSL"; break;
5076 }
5077
5078 uint32_t version = w[2];
5079
5080 const char *file =
5081 (count > 3) ? vtn_value(b, w[3], vtn_value_type_string)->str : "";
5082
5083 vtn_info("Parsing SPIR-V from %s %u source file %s", lang, version, file);
5084
5085 b->source_lang = w[1];
5086 break;
5087 }
5088
5089 case SpvOpSourceExtension:
5090 case SpvOpSourceContinued:
5091 case SpvOpModuleProcessed:
5092 /* Unhandled, but these are for debug so that's ok. */
5093 break;
5094
5095 default:
5096 unreachable("Unhandled opcode");
5097 }
5098 }
5099
5100 static void
vtn_handle_execution_mode(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5101 vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
5102 const struct vtn_decoration *mode, UNUSED void *data)
5103 {
5104 vtn_assert(b->entry_point == entry_point);
5105
5106 switch(mode->exec_mode) {
5107 case SpvExecutionModeOriginUpperLeft:
5108 case SpvExecutionModeOriginLowerLeft:
5109 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5110 b->shader->info.fs.origin_upper_left =
5111 (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
5112 break;
5113
5114 case SpvExecutionModeEarlyFragmentTests:
5115 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5116 b->shader->info.fs.early_fragment_tests = true;
5117 break;
5118
5119 case SpvExecutionModePostDepthCoverage:
5120 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5121 b->shader->info.fs.post_depth_coverage = true;
5122 break;
5123
5124 case SpvExecutionModeInvocations:
5125 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5126 b->shader->info.gs.invocations = MAX2(1, mode->operands[0]);
5127 break;
5128
5129 case SpvExecutionModeDepthReplacing:
5130 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5131 if (b->shader->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE)
5132 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
5133 break;
5134 case SpvExecutionModeDepthGreater:
5135 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5136 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
5137 break;
5138 case SpvExecutionModeDepthLess:
5139 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5140 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
5141 break;
5142 case SpvExecutionModeDepthUnchanged:
5143 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5144 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
5145 break;
5146
5147 case SpvExecutionModeLocalSizeHint:
5148 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5149 b->shader->info.cs.workgroup_size_hint[0] = mode->operands[0];
5150 b->shader->info.cs.workgroup_size_hint[1] = mode->operands[1];
5151 b->shader->info.cs.workgroup_size_hint[2] = mode->operands[2];
5152 break;
5153
5154 case SpvExecutionModeLocalSize:
5155 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5156 b->shader->info.workgroup_size[0] = mode->operands[0];
5157 b->shader->info.workgroup_size[1] = mode->operands[1];
5158 b->shader->info.workgroup_size[2] = mode->operands[2];
5159 } else {
5160 vtn_fail("Execution mode LocalSize not supported in stage %s",
5161 _mesa_shader_stage_to_string(b->shader->info.stage));
5162 }
5163 break;
5164
5165 case SpvExecutionModeOutputVertices:
5166 switch (b->shader->info.stage) {
5167 case MESA_SHADER_TESS_CTRL:
5168 case MESA_SHADER_TESS_EVAL:
5169 b->shader->info.tess.tcs_vertices_out = mode->operands[0];
5170 break;
5171 case MESA_SHADER_GEOMETRY:
5172 b->shader->info.gs.vertices_out = mode->operands[0];
5173 break;
5174 case MESA_SHADER_MESH:
5175 b->shader->info.mesh.max_vertices_out = mode->operands[0];
5176 break;
5177 default:
5178 vtn_fail("Execution mode OutputVertices not supported in stage %s",
5179 _mesa_shader_stage_to_string(b->shader->info.stage));
5180 break;
5181 }
5182 break;
5183
5184 case SpvExecutionModeInputPoints:
5185 case SpvExecutionModeInputLines:
5186 case SpvExecutionModeInputLinesAdjacency:
5187 case SpvExecutionModeTriangles:
5188 case SpvExecutionModeInputTrianglesAdjacency:
5189 case SpvExecutionModeQuads:
5190 case SpvExecutionModeIsolines:
5191 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5192 b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
5193 b->shader->info.tess._primitive_mode =
5194 tess_primitive_mode_from_spv_execution_mode(b, mode->exec_mode);
5195 } else {
5196 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5197 b->shader->info.gs.vertices_in =
5198 vertices_in_from_spv_execution_mode(b, mode->exec_mode);
5199 b->shader->info.gs.input_primitive =
5200 primitive_from_spv_execution_mode(b, mode->exec_mode);
5201 }
5202 break;
5203
5204 case SpvExecutionModeOutputPrimitivesNV:
5205 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5206 b->shader->info.mesh.max_primitives_out = mode->operands[0];
5207 break;
5208
5209 case SpvExecutionModeOutputLinesNV:
5210 case SpvExecutionModeOutputTrianglesNV:
5211 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5212 b->shader->info.mesh.primitive_type =
5213 primitive_from_spv_execution_mode(b, mode->exec_mode);
5214 break;
5215
5216 case SpvExecutionModeOutputPoints: {
5217 const unsigned primitive =
5218 primitive_from_spv_execution_mode(b, mode->exec_mode);
5219
5220 switch (b->shader->info.stage) {
5221 case MESA_SHADER_GEOMETRY:
5222 b->shader->info.gs.output_primitive = primitive;
5223 break;
5224 case MESA_SHADER_MESH:
5225 b->shader->info.mesh.primitive_type = primitive;
5226 break;
5227 default:
5228 vtn_fail("Execution mode OutputPoints not supported in stage %s",
5229 _mesa_shader_stage_to_string(b->shader->info.stage));
5230 break;
5231 }
5232 break;
5233 }
5234
5235 case SpvExecutionModeOutputLineStrip:
5236 case SpvExecutionModeOutputTriangleStrip:
5237 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5238 b->shader->info.gs.output_primitive =
5239 primitive_from_spv_execution_mode(b, mode->exec_mode);
5240 break;
5241
5242 case SpvExecutionModeSpacingEqual:
5243 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5244 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5245 b->shader->info.tess.spacing = TESS_SPACING_EQUAL;
5246 break;
5247 case SpvExecutionModeSpacingFractionalEven:
5248 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5249 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5250 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN;
5251 break;
5252 case SpvExecutionModeSpacingFractionalOdd:
5253 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5254 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5255 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD;
5256 break;
5257 case SpvExecutionModeVertexOrderCw:
5258 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5259 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5260 b->shader->info.tess.ccw = false;
5261 break;
5262 case SpvExecutionModeVertexOrderCcw:
5263 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5264 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5265 b->shader->info.tess.ccw = true;
5266 break;
5267 case SpvExecutionModePointMode:
5268 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5269 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5270 b->shader->info.tess.point_mode = true;
5271 break;
5272
5273 case SpvExecutionModePixelCenterInteger:
5274 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5275 b->shader->info.fs.pixel_center_integer = true;
5276 break;
5277
5278 case SpvExecutionModeXfb:
5279 b->shader->info.has_transform_feedback_varyings = true;
5280 break;
5281
5282 case SpvExecutionModeVecTypeHint:
5283 break; /* OpenCL */
5284
5285 case SpvExecutionModeContractionOff:
5286 if (b->shader->info.stage != MESA_SHADER_KERNEL)
5287 vtn_warn("ExectionMode only allowed for CL-style kernels: %s",
5288 spirv_executionmode_to_string(mode->exec_mode));
5289 else
5290 b->exact = true;
5291 break;
5292
5293 case SpvExecutionModeStencilRefReplacingEXT:
5294 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5295 break;
5296
5297 case SpvExecutionModeDerivativeGroupQuadsKHR:
5298 vtn_assert(gl_shader_stage_uses_workgroup(b->shader->info.stage));
5299 b->shader->info.derivative_group = DERIVATIVE_GROUP_QUADS;
5300 break;
5301
5302 case SpvExecutionModeDerivativeGroupLinearKHR:
5303 vtn_assert(gl_shader_stage_uses_workgroup(b->shader->info.stage));
5304 b->shader->info.derivative_group = DERIVATIVE_GROUP_LINEAR;
5305 break;
5306
5307 case SpvExecutionModePixelInterlockOrderedEXT:
5308 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5309 b->shader->info.fs.pixel_interlock_ordered = true;
5310 break;
5311
5312 case SpvExecutionModePixelInterlockUnorderedEXT:
5313 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5314 b->shader->info.fs.pixel_interlock_unordered = true;
5315 break;
5316
5317 case SpvExecutionModeSampleInterlockOrderedEXT:
5318 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5319 b->shader->info.fs.sample_interlock_ordered = true;
5320 break;
5321
5322 case SpvExecutionModeSampleInterlockUnorderedEXT:
5323 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5324 b->shader->info.fs.sample_interlock_unordered = true;
5325 break;
5326
5327 case SpvExecutionModeDenormPreserve:
5328 case SpvExecutionModeDenormFlushToZero:
5329 case SpvExecutionModeSignedZeroInfNanPreserve:
5330 case SpvExecutionModeRoundingModeRTE:
5331 case SpvExecutionModeRoundingModeRTZ: {
5332 unsigned execution_mode = 0;
5333 switch (mode->exec_mode) {
5334 case SpvExecutionModeDenormPreserve:
5335 switch (mode->operands[0]) {
5336 case 16: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP16; break;
5337 case 32: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP32; break;
5338 case 64: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP64; break;
5339 default: vtn_fail("Floating point type not supported");
5340 }
5341 break;
5342 case SpvExecutionModeDenormFlushToZero:
5343 switch (mode->operands[0]) {
5344 case 16: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16; break;
5345 case 32: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32; break;
5346 case 64: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64; break;
5347 default: vtn_fail("Floating point type not supported");
5348 }
5349 break;
5350 case SpvExecutionModeSignedZeroInfNanPreserve:
5351 switch (mode->operands[0]) {
5352 case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break;
5353 case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break;
5354 case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break;
5355 default: vtn_fail("Floating point type not supported");
5356 }
5357 break;
5358 case SpvExecutionModeRoundingModeRTE:
5359 switch (mode->operands[0]) {
5360 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16; break;
5361 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32; break;
5362 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64; break;
5363 default: vtn_fail("Floating point type not supported");
5364 }
5365 break;
5366 case SpvExecutionModeRoundingModeRTZ:
5367 switch (mode->operands[0]) {
5368 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16; break;
5369 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32; break;
5370 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; break;
5371 default: vtn_fail("Floating point type not supported");
5372 }
5373 break;
5374 default:
5375 break;
5376 }
5377
5378 b->shader->info.float_controls_execution_mode |= execution_mode;
5379
5380 for (unsigned bit_size = 16; bit_size <= 64; bit_size *= 2) {
5381 vtn_fail_if(nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, bit_size) &&
5382 nir_is_denorm_preserve(b->shader->info.float_controls_execution_mode, bit_size),
5383 "Cannot flush to zero and preserve denorms for the same bit size.");
5384 vtn_fail_if(nir_is_rounding_mode_rtne(b->shader->info.float_controls_execution_mode, bit_size) &&
5385 nir_is_rounding_mode_rtz(b->shader->info.float_controls_execution_mode, bit_size),
5386 "Cannot set rounding mode to RTNE and RTZ for the same bit size.");
5387 }
5388 break;
5389 }
5390
5391 case SpvExecutionModeMaximallyReconvergesKHR:
5392 b->shader->info.maximally_reconverges = true;
5393 break;
5394
5395 case SpvExecutionModeLocalSizeId:
5396 case SpvExecutionModeLocalSizeHintId:
5397 case SpvExecutionModeSubgroupsPerWorkgroupId:
5398 case SpvExecutionModeFPFastMathDefault:
5399 case SpvExecutionModeMaxNodeRecursionAMDX:
5400 case SpvExecutionModeStaticNumWorkgroupsAMDX:
5401 case SpvExecutionModeMaxNumWorkgroupsAMDX:
5402 case SpvExecutionModeShaderIndexAMDX:
5403 /* Handled later by vtn_handle_execution_mode_id(). */
5404 break;
5405
5406 case SpvExecutionModeSubgroupSize:
5407 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5408 vtn_assert(b->shader->info.subgroup_size == SUBGROUP_SIZE_VARYING);
5409 b->shader->info.subgroup_size = mode->operands[0];
5410 break;
5411
5412 case SpvExecutionModeSubgroupsPerWorkgroup:
5413 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5414 b->shader->info.num_subgroups = mode->operands[0];
5415 break;
5416
5417 case SpvExecutionModeSubgroupUniformControlFlowKHR:
5418 /* Nothing to do here */
5419 break;
5420
5421 case SpvExecutionModeEarlyAndLateFragmentTestsAMD:
5422 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5423 b->shader->info.fs.early_and_late_fragment_tests = true;
5424 break;
5425
5426 case SpvExecutionModeStencilRefGreaterFrontAMD:
5427 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5428 b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_GREATER;
5429 break;
5430
5431 case SpvExecutionModeStencilRefLessFrontAMD:
5432 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5433 b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_LESS;
5434 break;
5435
5436 case SpvExecutionModeStencilRefUnchangedFrontAMD:
5437 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5438 b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5439 break;
5440
5441 case SpvExecutionModeStencilRefGreaterBackAMD:
5442 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5443 b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_GREATER;
5444 break;
5445
5446 case SpvExecutionModeStencilRefLessBackAMD:
5447 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5448 b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_LESS;
5449 break;
5450
5451 case SpvExecutionModeStencilRefUnchangedBackAMD:
5452 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5453 b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5454 break;
5455
5456 case SpvExecutionModeRequireFullQuadsKHR:
5457 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5458 b->shader->info.fs.require_full_quads = true;
5459 break;
5460
5461 case SpvExecutionModeQuadDerivativesKHR:
5462 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5463 b->shader->info.fs.quad_derivatives = true;
5464 break;
5465
5466 case SpvExecutionModeCoalescingAMDX:
5467 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5468 b->shader->info.cs.workgroup_count[0] = 1;
5469 b->shader->info.cs.workgroup_count[1] = 1;
5470 b->shader->info.cs.workgroup_count[2] = 1;
5471 break;
5472
5473 default:
5474 vtn_fail("Unhandled execution mode: %s (%u)",
5475 spirv_executionmode_to_string(mode->exec_mode),
5476 mode->exec_mode);
5477 }
5478 }
5479
5480 static void
vtn_handle_execution_mode_id(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5481 vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_point,
5482 const struct vtn_decoration *mode, UNUSED void *data)
5483 {
5484
5485 vtn_assert(b->entry_point == entry_point);
5486
5487 switch (mode->exec_mode) {
5488 case SpvExecutionModeLocalSizeId:
5489 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5490 b->shader->info.workgroup_size[0] = vtn_constant_uint(b, mode->operands[0]);
5491 b->shader->info.workgroup_size[1] = vtn_constant_uint(b, mode->operands[1]);
5492 b->shader->info.workgroup_size[2] = vtn_constant_uint(b, mode->operands[2]);
5493 } else {
5494 vtn_fail("Execution mode LocalSizeId not supported in stage %s",
5495 _mesa_shader_stage_to_string(b->shader->info.stage));
5496 }
5497 break;
5498
5499 case SpvExecutionModeLocalSizeHintId:
5500 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5501 b->shader->info.cs.workgroup_size_hint[0] = vtn_constant_uint(b, mode->operands[0]);
5502 b->shader->info.cs.workgroup_size_hint[1] = vtn_constant_uint(b, mode->operands[1]);
5503 b->shader->info.cs.workgroup_size_hint[2] = vtn_constant_uint(b, mode->operands[2]);
5504 break;
5505
5506 case SpvExecutionModeSubgroupsPerWorkgroupId:
5507 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5508 b->shader->info.num_subgroups = vtn_constant_uint(b, mode->operands[0]);
5509 break;
5510
5511 case SpvExecutionModeFPFastMathDefault: {
5512 struct vtn_type *type = vtn_get_type(b, mode->operands[0]);
5513 SpvFPFastMathModeMask flags = vtn_constant_uint(b, mode->operands[1]);
5514
5515 SpvFPFastMathModeMask can_fast_math =
5516 SpvFPFastMathModeAllowRecipMask |
5517 SpvFPFastMathModeAllowContractMask |
5518 SpvFPFastMathModeAllowReassocMask |
5519 SpvFPFastMathModeAllowTransformMask;
5520 if ((flags & can_fast_math) != can_fast_math)
5521 b->exact = true;
5522
5523 unsigned execution_mode = 0;
5524 if (!(flags & SpvFPFastMathModeNotNaNMask)) {
5525 switch (glsl_get_bit_size(type->type)) {
5526 case 16: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP16; break;
5527 case 32: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP32; break;
5528 case 64: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP64; break;
5529 }
5530 }
5531 if (!(flags & SpvFPFastMathModeNotInfMask)) {
5532 switch (glsl_get_bit_size(type->type)) {
5533 case 16: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP16; break;
5534 case 32: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP32; break;
5535 case 64: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP64; break;
5536 }
5537 }
5538 if (!(flags & SpvFPFastMathModeNSZMask)) {
5539 switch (glsl_get_bit_size(type->type)) {
5540 case 16: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16; break;
5541 case 32: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32; break;
5542 case 64: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64; break;
5543 }
5544 }
5545 b->shader->info.float_controls_execution_mode |= execution_mode;
5546 break;
5547 }
5548
5549 case SpvExecutionModeMaxNodeRecursionAMDX:
5550 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5551 break;
5552
5553 case SpvExecutionModeStaticNumWorkgroupsAMDX:
5554 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5555 b->shader->info.cs.workgroup_count[0] = vtn_constant_uint(b, mode->operands[0]);
5556 b->shader->info.cs.workgroup_count[1] = vtn_constant_uint(b, mode->operands[1]);
5557 b->shader->info.cs.workgroup_count[2] = vtn_constant_uint(b, mode->operands[2]);
5558 assert(b->shader->info.cs.workgroup_count[0]);
5559 assert(b->shader->info.cs.workgroup_count[1]);
5560 assert(b->shader->info.cs.workgroup_count[2]);
5561 break;
5562
5563 case SpvExecutionModeMaxNumWorkgroupsAMDX:
5564 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5565 break;
5566
5567 case SpvExecutionModeShaderIndexAMDX:
5568 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5569 b->shader->info.cs.shader_index = vtn_constant_uint(b, mode->operands[0]);
5570 break;
5571
5572 default:
5573 /* Nothing to do. Literal execution modes already handled by
5574 * vtn_handle_execution_mode(). */
5575 break;
5576 }
5577 }
5578
5579 static bool
vtn_handle_variable_or_type_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5580 vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
5581 const uint32_t *w, unsigned count)
5582 {
5583 vtn_set_instruction_result_type(b, opcode, w, count);
5584
5585 switch (opcode) {
5586 case SpvOpSource:
5587 case SpvOpSourceContinued:
5588 case SpvOpSourceExtension:
5589 case SpvOpExtension:
5590 case SpvOpCapability:
5591 case SpvOpExtInstImport:
5592 case SpvOpMemoryModel:
5593 case SpvOpEntryPoint:
5594 case SpvOpExecutionMode:
5595 case SpvOpString:
5596 case SpvOpName:
5597 case SpvOpMemberName:
5598 case SpvOpDecorationGroup:
5599 case SpvOpDecorate:
5600 case SpvOpDecorateId:
5601 case SpvOpMemberDecorate:
5602 case SpvOpGroupDecorate:
5603 case SpvOpGroupMemberDecorate:
5604 case SpvOpDecorateString:
5605 case SpvOpMemberDecorateString:
5606 vtn_fail("Invalid opcode types and variables section");
5607 break;
5608
5609 case SpvOpTypeVoid:
5610 case SpvOpTypeBool:
5611 case SpvOpTypeInt:
5612 case SpvOpTypeFloat:
5613 case SpvOpTypeVector:
5614 case SpvOpTypeMatrix:
5615 case SpvOpTypeImage:
5616 case SpvOpTypeSampler:
5617 case SpvOpTypeSampledImage:
5618 case SpvOpTypeArray:
5619 case SpvOpTypeRuntimeArray:
5620 case SpvOpTypeStruct:
5621 case SpvOpTypeOpaque:
5622 case SpvOpTypePointer:
5623 case SpvOpTypeForwardPointer:
5624 case SpvOpTypeFunction:
5625 case SpvOpTypeEvent:
5626 case SpvOpTypeDeviceEvent:
5627 case SpvOpTypeReserveId:
5628 case SpvOpTypeQueue:
5629 case SpvOpTypePipe:
5630 case SpvOpTypeAccelerationStructureKHR:
5631 case SpvOpTypeRayQueryKHR:
5632 case SpvOpTypeCooperativeMatrixKHR:
5633 vtn_handle_type(b, opcode, w, count);
5634 break;
5635
5636 case SpvOpConstantTrue:
5637 case SpvOpConstantFalse:
5638 case SpvOpConstant:
5639 case SpvOpConstantComposite:
5640 case SpvOpConstantCompositeReplicateEXT:
5641 case SpvOpConstantNull:
5642 case SpvOpSpecConstantTrue:
5643 case SpvOpSpecConstantFalse:
5644 case SpvOpSpecConstant:
5645 case SpvOpSpecConstantComposite:
5646 case SpvOpSpecConstantCompositeReplicateEXT:
5647 case SpvOpSpecConstantOp:
5648 vtn_handle_constant(b, opcode, w, count);
5649 break;
5650
5651 case SpvOpUndef:
5652 case SpvOpVariable:
5653 case SpvOpConstantSampler:
5654 vtn_handle_variables(b, opcode, w, count);
5655 break;
5656
5657 case SpvOpExtInst:
5658 case SpvOpExtInstWithForwardRefsKHR: {
5659 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5660 /* NonSemantic extended instructions are acceptable in preamble, others
5661 * will indicate the end of preamble.
5662 */
5663 return val->ext_handler == vtn_handle_non_semantic_instruction;
5664 }
5665
5666 default:
5667 return false; /* End of preamble */
5668 }
5669
5670 return true;
5671 }
5672
5673 static struct vtn_ssa_value *
vtn_nir_select(struct vtn_builder * b,struct vtn_ssa_value * src0,struct vtn_ssa_value * src1,struct vtn_ssa_value * src2)5674 vtn_nir_select(struct vtn_builder *b, struct vtn_ssa_value *src0,
5675 struct vtn_ssa_value *src1, struct vtn_ssa_value *src2)
5676 {
5677 struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
5678 dest->type = src1->type;
5679
5680 if (src1->is_variable || src2->is_variable) {
5681 vtn_assert(src1->is_variable && src2->is_variable);
5682
5683 nir_variable *dest_var =
5684 nir_local_variable_create(b->nb.impl, dest->type, "var_select");
5685 nir_deref_instr *dest_deref = nir_build_deref_var(&b->nb, dest_var);
5686
5687 nir_push_if(&b->nb, src0->def);
5688 {
5689 nir_deref_instr *src1_deref = vtn_get_deref_for_ssa_value(b, src1);
5690 vtn_local_store(b, vtn_local_load(b, src1_deref, 0), dest_deref, 0);
5691 }
5692 nir_push_else(&b->nb, NULL);
5693 {
5694 nir_deref_instr *src2_deref = vtn_get_deref_for_ssa_value(b, src2);
5695 vtn_local_store(b, vtn_local_load(b, src2_deref, 0), dest_deref, 0);
5696 }
5697 nir_pop_if(&b->nb, NULL);
5698
5699 vtn_set_ssa_value_var(b, dest, dest_var);
5700 } else if (glsl_type_is_vector_or_scalar(src1->type)) {
5701 dest->def = nir_bcsel(&b->nb, src0->def, src1->def, src2->def);
5702 } else {
5703 unsigned elems = glsl_get_length(src1->type);
5704
5705 dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
5706 for (unsigned i = 0; i < elems; i++) {
5707 dest->elems[i] = vtn_nir_select(b, src0,
5708 src1->elems[i], src2->elems[i]);
5709 }
5710 }
5711
5712 return dest;
5713 }
5714
5715 static void
vtn_handle_select(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5716 vtn_handle_select(struct vtn_builder *b, SpvOp opcode,
5717 const uint32_t *w, unsigned count)
5718 {
5719 /* Handle OpSelect up-front here because it needs to be able to handle
5720 * pointers and not just regular vectors and scalars.
5721 */
5722 struct vtn_value *res_val = vtn_untyped_value(b, w[2]);
5723 struct vtn_value *cond_val = vtn_untyped_value(b, w[3]);
5724 struct vtn_value *obj1_val = vtn_untyped_value(b, w[4]);
5725 struct vtn_value *obj2_val = vtn_untyped_value(b, w[5]);
5726
5727 vtn_fail_if(obj1_val->type != res_val->type ||
5728 obj2_val->type != res_val->type,
5729 "Object types must match the result type in OpSelect (%%%u = %%%u ? %%%u : %%%u)", w[2], w[3], w[4], w[5]);
5730
5731 vtn_fail_if((cond_val->type->base_type != vtn_base_type_scalar &&
5732 cond_val->type->base_type != vtn_base_type_vector) ||
5733 !glsl_type_is_boolean(cond_val->type->type),
5734 "OpSelect must have either a vector of booleans or "
5735 "a boolean as Condition type");
5736
5737 vtn_fail_if(cond_val->type->base_type == vtn_base_type_vector &&
5738 (res_val->type->base_type != vtn_base_type_vector ||
5739 res_val->type->length != cond_val->type->length),
5740 "When Condition type in OpSelect is a vector, the Result "
5741 "type must be a vector of the same length");
5742
5743 switch (res_val->type->base_type) {
5744 case vtn_base_type_scalar:
5745 case vtn_base_type_vector:
5746 case vtn_base_type_matrix:
5747 case vtn_base_type_array:
5748 case vtn_base_type_struct:
5749 /* OK. */
5750 break;
5751 case vtn_base_type_pointer:
5752 /* We need to have actual storage for pointer types. */
5753 vtn_fail_if(res_val->type->type == NULL,
5754 "Invalid pointer result type for OpSelect");
5755 break;
5756 default:
5757 vtn_fail("Result type of OpSelect must be a scalar, composite, or pointer");
5758 }
5759
5760 vtn_push_ssa_value(b, w[2],
5761 vtn_nir_select(b, vtn_ssa_value(b, w[3]),
5762 vtn_ssa_value(b, w[4]),
5763 vtn_ssa_value(b, w[5])));
5764 }
5765
5766 static void
vtn_handle_ptr(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5767 vtn_handle_ptr(struct vtn_builder *b, SpvOp opcode,
5768 const uint32_t *w, unsigned count)
5769 {
5770 struct vtn_type *type1 = vtn_get_value_type(b, w[3]);
5771 struct vtn_type *type2 = vtn_get_value_type(b, w[4]);
5772 vtn_fail_if(type1->base_type != vtn_base_type_pointer ||
5773 type2->base_type != vtn_base_type_pointer,
5774 "%s operands must have pointer types",
5775 spirv_op_to_string(opcode));
5776 vtn_fail_if(type1->storage_class != type2->storage_class,
5777 "%s operands must have the same storage class",
5778 spirv_op_to_string(opcode));
5779
5780 struct vtn_type *vtn_type = vtn_get_type(b, w[1]);
5781 const struct glsl_type *type = vtn_type->type;
5782
5783 nir_address_format addr_format = vtn_mode_to_address_format(
5784 b, vtn_storage_class_to_mode(b, type1->storage_class, NULL, NULL));
5785
5786 nir_def *def;
5787
5788 switch (opcode) {
5789 case SpvOpPtrDiff: {
5790 /* OpPtrDiff returns the difference in number of elements (not byte offset). */
5791 unsigned elem_size, elem_align;
5792 glsl_get_natural_size_align_bytes(type1->pointed->type,
5793 &elem_size, &elem_align);
5794
5795 def = nir_build_addr_isub(&b->nb,
5796 vtn_get_nir_ssa(b, w[3]),
5797 vtn_get_nir_ssa(b, w[4]),
5798 addr_format);
5799 def = nir_idiv(&b->nb, def, nir_imm_intN_t(&b->nb, elem_size, def->bit_size));
5800 def = nir_i2iN(&b->nb, def, glsl_get_bit_size(type));
5801 break;
5802 }
5803
5804 case SpvOpPtrEqual:
5805 case SpvOpPtrNotEqual: {
5806 def = nir_build_addr_ieq(&b->nb,
5807 vtn_get_nir_ssa(b, w[3]),
5808 vtn_get_nir_ssa(b, w[4]),
5809 addr_format);
5810 if (opcode == SpvOpPtrNotEqual)
5811 def = nir_inot(&b->nb, def);
5812 break;
5813 }
5814
5815 default:
5816 unreachable("Invalid ptr operation");
5817 }
5818
5819 vtn_push_nir_ssa(b, w[2], def);
5820 }
5821
5822 static void
vtn_handle_ray_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5823 vtn_handle_ray_intrinsic(struct vtn_builder *b, SpvOp opcode,
5824 const uint32_t *w, unsigned count)
5825 {
5826 nir_intrinsic_instr *intrin;
5827
5828 switch (opcode) {
5829 case SpvOpTraceNV:
5830 case SpvOpTraceRayKHR: {
5831 intrin = nir_intrinsic_instr_create(b->nb.shader,
5832 nir_intrinsic_trace_ray);
5833
5834 /* The sources are in the same order in the NIR intrinsic */
5835 for (unsigned i = 0; i < 10; i++)
5836 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
5837
5838 nir_deref_instr *payload;
5839 if (opcode == SpvOpTraceNV)
5840 payload = vtn_get_call_payload_for_location(b, w[11]);
5841 else
5842 payload = vtn_nir_deref(b, w[11]);
5843 intrin->src[10] = nir_src_for_ssa(&payload->def);
5844 nir_builder_instr_insert(&b->nb, &intrin->instr);
5845 break;
5846 }
5847
5848 case SpvOpReportIntersectionKHR: {
5849 intrin = nir_intrinsic_instr_create(b->nb.shader,
5850 nir_intrinsic_report_ray_intersection);
5851 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
5852 intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
5853 nir_def_init(&intrin->instr, &intrin->def, 1, 1);
5854 nir_builder_instr_insert(&b->nb, &intrin->instr);
5855 vtn_push_nir_ssa(b, w[2], &intrin->def);
5856 break;
5857 }
5858
5859 case SpvOpIgnoreIntersectionNV:
5860 intrin = nir_intrinsic_instr_create(b->nb.shader,
5861 nir_intrinsic_ignore_ray_intersection);
5862 nir_builder_instr_insert(&b->nb, &intrin->instr);
5863 break;
5864
5865 case SpvOpTerminateRayNV:
5866 intrin = nir_intrinsic_instr_create(b->nb.shader,
5867 nir_intrinsic_terminate_ray);
5868 nir_builder_instr_insert(&b->nb, &intrin->instr);
5869 break;
5870
5871 case SpvOpExecuteCallableNV:
5872 case SpvOpExecuteCallableKHR: {
5873 intrin = nir_intrinsic_instr_create(b->nb.shader,
5874 nir_intrinsic_execute_callable);
5875 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
5876 nir_deref_instr *payload;
5877 if (opcode == SpvOpExecuteCallableNV)
5878 payload = vtn_get_call_payload_for_location(b, w[2]);
5879 else
5880 payload = vtn_nir_deref(b, w[2]);
5881 intrin->src[1] = nir_src_for_ssa(&payload->def);
5882 nir_builder_instr_insert(&b->nb, &intrin->instr);
5883 break;
5884 }
5885
5886 default:
5887 vtn_fail_with_opcode("Unhandled opcode", opcode);
5888 }
5889 }
5890
5891 static void
vtn_handle_write_packed_primitive_indices(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5892 vtn_handle_write_packed_primitive_indices(struct vtn_builder *b, SpvOp opcode,
5893 const uint32_t *w, unsigned count)
5894 {
5895 vtn_assert(opcode == SpvOpWritePackedPrimitiveIndices4x8NV);
5896
5897 /* TODO(mesh): Use or create a primitive that allow the unpacking to
5898 * happen in the backend. What we have here is functional but too
5899 * blunt.
5900 */
5901
5902 struct vtn_type *offset_type = vtn_get_value_type(b, w[1]);
5903 vtn_fail_if(offset_type->base_type != vtn_base_type_scalar ||
5904 offset_type->type != glsl_uint_type(),
5905 "Index Offset type of OpWritePackedPrimitiveIndices4x8NV "
5906 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5907
5908 struct vtn_type *packed_type = vtn_get_value_type(b, w[2]);
5909 vtn_fail_if(packed_type->base_type != vtn_base_type_scalar ||
5910 packed_type->type != glsl_uint_type(),
5911 "Packed Indices type of OpWritePackedPrimitiveIndices4x8NV "
5912 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5913
5914 nir_deref_instr *indices = NULL;
5915 nir_foreach_variable_with_modes(var, b->nb.shader, nir_var_shader_out) {
5916 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) {
5917 indices = nir_build_deref_var(&b->nb, var);
5918 break;
5919 }
5920 }
5921
5922 /* It may be the case that the variable is not present in the
5923 * entry point interface list.
5924 *
5925 * See https://github.com/KhronosGroup/SPIRV-Registry/issues/104.
5926 */
5927
5928 if (!indices) {
5929 unsigned vertices_per_prim =
5930 mesa_vertices_per_prim(b->shader->info.mesh.primitive_type);
5931 unsigned max_prim_indices =
5932 vertices_per_prim * b->shader->info.mesh.max_primitives_out;
5933 const struct glsl_type *t =
5934 glsl_array_type(glsl_uint_type(), max_prim_indices, 0);
5935 nir_variable *var =
5936 nir_variable_create(b->shader, nir_var_shader_out, t,
5937 "gl_PrimitiveIndicesNV");
5938
5939 var->data.location = VARYING_SLOT_PRIMITIVE_INDICES;
5940 var->data.interpolation = INTERP_MODE_NONE;
5941 indices = nir_build_deref_var(&b->nb, var);
5942 }
5943
5944 nir_def *offset = vtn_get_nir_ssa(b, w[1]);
5945 nir_def *packed = vtn_get_nir_ssa(b, w[2]);
5946 nir_def *unpacked = nir_unpack_bits(&b->nb, packed, 8);
5947 for (int i = 0; i < 4; i++) {
5948 nir_deref_instr *offset_deref =
5949 nir_build_deref_array(&b->nb, indices,
5950 nir_iadd_imm(&b->nb, offset, i));
5951 nir_def *val = nir_u2u32(&b->nb, nir_channel(&b->nb, unpacked, i));
5952
5953 nir_store_deref(&b->nb, offset_deref, val, 0x1);
5954 }
5955 }
5956
5957 struct ray_query_value {
5958 nir_ray_query_value nir_value;
5959 const struct glsl_type *glsl_type;
5960 };
5961
5962 static struct ray_query_value
spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode)5963 spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder *b,
5964 SpvOp opcode)
5965 {
5966 switch (opcode) {
5967 #define CASE(_spv, _nir, _type) case SpvOpRayQueryGet##_spv: \
5968 return (struct ray_query_value) { .nir_value = nir_ray_query_value_##_nir, .glsl_type = _type }
5969 CASE(RayTMinKHR, tmin, glsl_floatN_t_type(32));
5970 CASE(RayFlagsKHR, flags, glsl_uint_type());
5971 CASE(WorldRayDirectionKHR, world_ray_direction, glsl_vec_type(3));
5972 CASE(WorldRayOriginKHR, world_ray_origin, glsl_vec_type(3));
5973 CASE(IntersectionTypeKHR, intersection_type, glsl_uint_type());
5974 CASE(IntersectionTKHR, intersection_t, glsl_floatN_t_type(32));
5975 CASE(IntersectionInstanceCustomIndexKHR, intersection_instance_custom_index, glsl_int_type());
5976 CASE(IntersectionInstanceIdKHR, intersection_instance_id, glsl_int_type());
5977 CASE(IntersectionInstanceShaderBindingTableRecordOffsetKHR, intersection_instance_sbt_index, glsl_uint_type());
5978 CASE(IntersectionGeometryIndexKHR, intersection_geometry_index, glsl_int_type());
5979 CASE(IntersectionPrimitiveIndexKHR, intersection_primitive_index, glsl_int_type());
5980 CASE(IntersectionBarycentricsKHR, intersection_barycentrics, glsl_vec_type(2));
5981 CASE(IntersectionFrontFaceKHR, intersection_front_face, glsl_bool_type());
5982 CASE(IntersectionCandidateAABBOpaqueKHR, intersection_candidate_aabb_opaque, glsl_bool_type());
5983 CASE(IntersectionObjectToWorldKHR, intersection_object_to_world, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
5984 CASE(IntersectionWorldToObjectKHR, intersection_world_to_object, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
5985 CASE(IntersectionObjectRayOriginKHR, intersection_object_ray_origin, glsl_vec_type(3));
5986 CASE(IntersectionObjectRayDirectionKHR, intersection_object_ray_direction, glsl_vec_type(3));
5987 CASE(IntersectionTriangleVertexPositionsKHR, intersection_triangle_vertex_positions, glsl_array_type(glsl_vec_type(3), 3,
5988 glsl_get_explicit_stride(glsl_vec_type(3))));
5989 #undef CASE
5990 default:
5991 vtn_fail_with_opcode("Unhandled opcode", opcode);
5992 }
5993 }
5994
5995 static void
ray_query_load_intrinsic_create(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_def * src0,bool committed)5996 ray_query_load_intrinsic_create(struct vtn_builder *b, SpvOp opcode,
5997 const uint32_t *w, nir_def *src0,
5998 bool committed)
5999 {
6000 struct ray_query_value value =
6001 spirv_to_nir_type_ray_query_intrinsic(b, opcode);
6002
6003 if (glsl_type_is_array_or_matrix(value.glsl_type)) {
6004 const struct glsl_type *elem_type = glsl_get_array_element(value.glsl_type);
6005 const unsigned elems = glsl_get_length(value.glsl_type);
6006
6007 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, value.glsl_type);
6008 for (unsigned i = 0; i < elems; i++) {
6009 ssa->elems[i]->def =
6010 nir_rq_load(&b->nb,
6011 glsl_get_vector_elements(elem_type),
6012 glsl_get_bit_size(elem_type),
6013 src0,
6014 .ray_query_value = value.nir_value,
6015 .committed = committed,
6016 .column = i);
6017 }
6018
6019 vtn_push_ssa_value(b, w[2], ssa);
6020 } else {
6021 assert(glsl_type_is_vector_or_scalar(value.glsl_type));
6022
6023 vtn_push_nir_ssa(b, w[2],
6024 nir_rq_load(&b->nb,
6025 glsl_get_vector_elements(value.glsl_type),
6026 glsl_get_bit_size(value.glsl_type),
6027 src0,
6028 .ray_query_value = value.nir_value,
6029 .committed = committed));
6030 }
6031 }
6032
6033 static void
vtn_handle_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6034 vtn_handle_ray_query_intrinsic(struct vtn_builder *b, SpvOp opcode,
6035 const uint32_t *w, unsigned count)
6036 {
6037 switch (opcode) {
6038 case SpvOpRayQueryInitializeKHR: {
6039 nir_intrinsic_instr *intrin =
6040 nir_intrinsic_instr_create(b->nb.shader,
6041 nir_intrinsic_rq_initialize);
6042 /* The sources are in the same order in the NIR intrinsic */
6043 for (unsigned i = 0; i < 8; i++)
6044 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
6045 nir_builder_instr_insert(&b->nb, &intrin->instr);
6046 break;
6047 }
6048
6049 case SpvOpRayQueryTerminateKHR:
6050 nir_rq_terminate(&b->nb, vtn_ssa_value(b, w[1])->def);
6051 break;
6052
6053 case SpvOpRayQueryProceedKHR:
6054 vtn_push_nir_ssa(b, w[2],
6055 nir_rq_proceed(&b->nb, 1, vtn_ssa_value(b, w[3])->def));
6056 break;
6057
6058 case SpvOpRayQueryGenerateIntersectionKHR:
6059 nir_rq_generate_intersection(&b->nb,
6060 vtn_ssa_value(b, w[1])->def,
6061 vtn_ssa_value(b, w[2])->def);
6062 break;
6063
6064 case SpvOpRayQueryConfirmIntersectionKHR:
6065 nir_rq_confirm_intersection(&b->nb, vtn_ssa_value(b, w[1])->def);
6066 break;
6067
6068 case SpvOpRayQueryGetIntersectionTKHR:
6069 case SpvOpRayQueryGetIntersectionTypeKHR:
6070 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6071 case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6072 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6073 case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6074 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6075 case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6076 case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6077 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6078 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6079 case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6080 case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6081 case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6082 ray_query_load_intrinsic_create(b, opcode, w,
6083 vtn_ssa_value(b, w[3])->def,
6084 vtn_constant_uint(b, w[4]));
6085 break;
6086
6087 case SpvOpRayQueryGetRayTMinKHR:
6088 case SpvOpRayQueryGetRayFlagsKHR:
6089 case SpvOpRayQueryGetWorldRayDirectionKHR:
6090 case SpvOpRayQueryGetWorldRayOriginKHR:
6091 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6092 ray_query_load_intrinsic_create(b, opcode, w,
6093 vtn_ssa_value(b, w[3])->def,
6094 /* Committed value is ignored for these */
6095 false);
6096 break;
6097
6098 default:
6099 vtn_fail_with_opcode("Unhandled opcode", opcode);
6100 }
6101 }
6102
6103 static void
vtn_handle_allocate_node_payloads(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6104 vtn_handle_allocate_node_payloads(struct vtn_builder *b, SpvOp opcode,
6105 const uint32_t *w, unsigned count)
6106 {
6107 vtn_assert(opcode == SpvOpAllocateNodePayloadsAMDX);
6108
6109 nir_def *payloads = vtn_ssa_value(b, w[1])->def;
6110 mesa_scope scope = vtn_translate_scope(b, vtn_constant_uint(b, w[2]));
6111 nir_def *payload_count = vtn_ssa_value(b, w[3])->def;
6112 nir_def *node_index = vtn_ssa_value(b, w[4])->def;
6113
6114 nir_initialize_node_payloads(&b->nb, payloads, payload_count, node_index, .execution_scope = scope);
6115 }
6116
6117 static bool
vtn_handle_body_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6118 vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
6119 const uint32_t *w, unsigned count)
6120 {
6121 if (b->options->debug_info) {
6122 nir_debug_info_instr *instr =
6123 nir_debug_info_instr_create(b->shader, nir_debug_info_src_loc, 0);
6124 instr->src_loc.spirv_offset = b->spirv_offset;
6125 instr->src_loc.source = nir_debug_info_spirv;
6126
6127 if (b->file) {
6128 nir_def *filename;
6129 struct hash_entry *he = _mesa_hash_table_search(b->strings, b->file);
6130 if (he) {
6131 filename = he->data;
6132 } else {
6133 nir_builder _b = nir_builder_at(nir_before_cf_list(&b->nb.impl->body));
6134 filename = nir_build_string(&_b, b->file);
6135 _mesa_hash_table_insert(b->strings, b->file, filename);
6136 }
6137
6138 instr->src_loc.filename = nir_src_for_ssa(filename);
6139 /* Make sure line is at least 1 since 0 is reserved for spirv_offset-only
6140 * source locations.
6141 */
6142 instr->src_loc.line = MAX2(b->line, 1);
6143 instr->src_loc.column = b->col;
6144 }
6145
6146 nir_builder_instr_insert(&b->nb, &instr->instr);
6147 }
6148
6149 switch (opcode) {
6150 case SpvOpLabel:
6151 break;
6152
6153 case SpvOpLoopMerge:
6154 case SpvOpSelectionMerge:
6155 /* This is handled by cfg pre-pass and walk_blocks */
6156 break;
6157
6158 case SpvOpUndef: {
6159 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
6160 val->type = vtn_get_type(b, w[1]);
6161 break;
6162 }
6163
6164 case SpvOpExtInst:
6165 case SpvOpExtInstWithForwardRefsKHR:
6166 vtn_handle_extension(b, opcode, w, count);
6167 break;
6168
6169 case SpvOpVariable:
6170 case SpvOpLoad:
6171 case SpvOpStore:
6172 case SpvOpCopyMemory:
6173 case SpvOpCopyMemorySized:
6174 case SpvOpAccessChain:
6175 case SpvOpPtrAccessChain:
6176 case SpvOpInBoundsAccessChain:
6177 case SpvOpInBoundsPtrAccessChain:
6178 case SpvOpArrayLength:
6179 case SpvOpConvertPtrToU:
6180 case SpvOpConvertUToPtr:
6181 case SpvOpGenericCastToPtrExplicit:
6182 case SpvOpGenericPtrMemSemantics:
6183 case SpvOpSubgroupBlockReadINTEL:
6184 case SpvOpSubgroupBlockWriteINTEL:
6185 case SpvOpConvertUToAccelerationStructureKHR:
6186 vtn_handle_variables(b, opcode, w, count);
6187 break;
6188
6189 case SpvOpFunctionCall:
6190 vtn_handle_function_call(b, opcode, w, count);
6191 break;
6192
6193 case SpvOpSampledImage:
6194 case SpvOpImage:
6195 case SpvOpImageSparseTexelsResident:
6196 case SpvOpImageSampleImplicitLod:
6197 case SpvOpImageSparseSampleImplicitLod:
6198 case SpvOpImageSampleExplicitLod:
6199 case SpvOpImageSparseSampleExplicitLod:
6200 case SpvOpImageSampleDrefImplicitLod:
6201 case SpvOpImageSparseSampleDrefImplicitLod:
6202 case SpvOpImageSampleDrefExplicitLod:
6203 case SpvOpImageSparseSampleDrefExplicitLod:
6204 case SpvOpImageSampleProjImplicitLod:
6205 case SpvOpImageSampleProjExplicitLod:
6206 case SpvOpImageSampleProjDrefImplicitLod:
6207 case SpvOpImageSampleProjDrefExplicitLod:
6208 case SpvOpImageFetch:
6209 case SpvOpImageSparseFetch:
6210 case SpvOpImageGather:
6211 case SpvOpImageSparseGather:
6212 case SpvOpImageDrefGather:
6213 case SpvOpImageSparseDrefGather:
6214 case SpvOpImageQueryLod:
6215 vtn_handle_texture(b, opcode, w, count);
6216 break;
6217
6218 case SpvOpImageRead:
6219 case SpvOpImageSparseRead:
6220 case SpvOpImageWrite:
6221 case SpvOpImageTexelPointer:
6222 case SpvOpImageQueryFormat:
6223 case SpvOpImageQueryOrder:
6224 vtn_handle_image(b, opcode, w, count);
6225 break;
6226
6227 case SpvOpImageQueryLevels:
6228 case SpvOpImageQuerySamples:
6229 case SpvOpImageQuerySizeLod:
6230 case SpvOpImageQuerySize: {
6231 struct vtn_type *image_type = vtn_get_value_type(b, w[3]);
6232 vtn_assert(image_type->base_type == vtn_base_type_image);
6233 if (glsl_type_is_image(image_type->glsl_image)) {
6234 vtn_handle_image(b, opcode, w, count);
6235 } else {
6236 vtn_assert(glsl_type_is_texture(image_type->glsl_image));
6237 vtn_handle_texture(b, opcode, w, count);
6238 }
6239 break;
6240 }
6241
6242 case SpvOpFragmentMaskFetchAMD:
6243 case SpvOpFragmentFetchAMD:
6244 vtn_handle_texture(b, opcode, w, count);
6245 break;
6246
6247 case SpvOpAtomicLoad:
6248 case SpvOpAtomicExchange:
6249 case SpvOpAtomicCompareExchange:
6250 case SpvOpAtomicCompareExchangeWeak:
6251 case SpvOpAtomicIIncrement:
6252 case SpvOpAtomicIDecrement:
6253 case SpvOpAtomicIAdd:
6254 case SpvOpAtomicISub:
6255 case SpvOpAtomicSMin:
6256 case SpvOpAtomicUMin:
6257 case SpvOpAtomicSMax:
6258 case SpvOpAtomicUMax:
6259 case SpvOpAtomicAnd:
6260 case SpvOpAtomicOr:
6261 case SpvOpAtomicXor:
6262 case SpvOpAtomicFAddEXT:
6263 case SpvOpAtomicFMinEXT:
6264 case SpvOpAtomicFMaxEXT:
6265 case SpvOpAtomicFlagTestAndSet: {
6266 struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
6267 if (pointer->value_type == vtn_value_type_image_pointer) {
6268 vtn_handle_image(b, opcode, w, count);
6269 } else {
6270 vtn_assert(pointer->value_type == vtn_value_type_pointer);
6271 vtn_handle_atomics(b, opcode, w, count);
6272 }
6273 break;
6274 }
6275
6276 case SpvOpAtomicStore:
6277 case SpvOpAtomicFlagClear: {
6278 struct vtn_value *pointer = vtn_untyped_value(b, w[1]);
6279 if (pointer->value_type == vtn_value_type_image_pointer) {
6280 vtn_handle_image(b, opcode, w, count);
6281 } else {
6282 vtn_assert(pointer->value_type == vtn_value_type_pointer);
6283 vtn_handle_atomics(b, opcode, w, count);
6284 }
6285 break;
6286 }
6287
6288 case SpvOpSelect:
6289 vtn_handle_select(b, opcode, w, count);
6290 break;
6291
6292 case SpvOpSNegate:
6293 case SpvOpFNegate:
6294 case SpvOpNot:
6295 case SpvOpAny:
6296 case SpvOpAll:
6297 case SpvOpConvertFToU:
6298 case SpvOpConvertFToS:
6299 case SpvOpConvertSToF:
6300 case SpvOpConvertUToF:
6301 case SpvOpUConvert:
6302 case SpvOpSConvert:
6303 case SpvOpFConvert:
6304 case SpvOpQuantizeToF16:
6305 case SpvOpSatConvertSToU:
6306 case SpvOpSatConvertUToS:
6307 case SpvOpPtrCastToGeneric:
6308 case SpvOpGenericCastToPtr:
6309 case SpvOpIsNan:
6310 case SpvOpIsInf:
6311 case SpvOpIsFinite:
6312 case SpvOpIsNormal:
6313 case SpvOpSignBitSet:
6314 case SpvOpLessOrGreater:
6315 case SpvOpOrdered:
6316 case SpvOpUnordered:
6317 case SpvOpIAdd:
6318 case SpvOpFAdd:
6319 case SpvOpISub:
6320 case SpvOpFSub:
6321 case SpvOpIMul:
6322 case SpvOpFMul:
6323 case SpvOpUDiv:
6324 case SpvOpSDiv:
6325 case SpvOpFDiv:
6326 case SpvOpUMod:
6327 case SpvOpSRem:
6328 case SpvOpSMod:
6329 case SpvOpFRem:
6330 case SpvOpFMod:
6331 case SpvOpVectorTimesScalar:
6332 case SpvOpDot:
6333 case SpvOpIAddCarry:
6334 case SpvOpISubBorrow:
6335 case SpvOpUMulExtended:
6336 case SpvOpSMulExtended:
6337 case SpvOpShiftRightLogical:
6338 case SpvOpShiftRightArithmetic:
6339 case SpvOpShiftLeftLogical:
6340 case SpvOpLogicalEqual:
6341 case SpvOpLogicalNotEqual:
6342 case SpvOpLogicalOr:
6343 case SpvOpLogicalAnd:
6344 case SpvOpLogicalNot:
6345 case SpvOpBitwiseOr:
6346 case SpvOpBitwiseXor:
6347 case SpvOpBitwiseAnd:
6348 case SpvOpIEqual:
6349 case SpvOpFOrdEqual:
6350 case SpvOpFUnordEqual:
6351 case SpvOpINotEqual:
6352 case SpvOpFOrdNotEqual:
6353 case SpvOpFUnordNotEqual:
6354 case SpvOpULessThan:
6355 case SpvOpSLessThan:
6356 case SpvOpFOrdLessThan:
6357 case SpvOpFUnordLessThan:
6358 case SpvOpUGreaterThan:
6359 case SpvOpSGreaterThan:
6360 case SpvOpFOrdGreaterThan:
6361 case SpvOpFUnordGreaterThan:
6362 case SpvOpULessThanEqual:
6363 case SpvOpSLessThanEqual:
6364 case SpvOpFOrdLessThanEqual:
6365 case SpvOpFUnordLessThanEqual:
6366 case SpvOpUGreaterThanEqual:
6367 case SpvOpSGreaterThanEqual:
6368 case SpvOpFOrdGreaterThanEqual:
6369 case SpvOpFUnordGreaterThanEqual:
6370 case SpvOpDPdx:
6371 case SpvOpDPdy:
6372 case SpvOpFwidth:
6373 case SpvOpDPdxFine:
6374 case SpvOpDPdyFine:
6375 case SpvOpFwidthFine:
6376 case SpvOpDPdxCoarse:
6377 case SpvOpDPdyCoarse:
6378 case SpvOpFwidthCoarse:
6379 case SpvOpBitFieldInsert:
6380 case SpvOpBitFieldSExtract:
6381 case SpvOpBitFieldUExtract:
6382 case SpvOpBitReverse:
6383 case SpvOpBitCount:
6384 case SpvOpTranspose:
6385 case SpvOpOuterProduct:
6386 case SpvOpMatrixTimesScalar:
6387 case SpvOpVectorTimesMatrix:
6388 case SpvOpMatrixTimesVector:
6389 case SpvOpMatrixTimesMatrix:
6390 case SpvOpUCountLeadingZerosINTEL:
6391 case SpvOpUCountTrailingZerosINTEL:
6392 case SpvOpAbsISubINTEL:
6393 case SpvOpAbsUSubINTEL:
6394 case SpvOpIAddSatINTEL:
6395 case SpvOpUAddSatINTEL:
6396 case SpvOpIAverageINTEL:
6397 case SpvOpUAverageINTEL:
6398 case SpvOpIAverageRoundedINTEL:
6399 case SpvOpUAverageRoundedINTEL:
6400 case SpvOpISubSatINTEL:
6401 case SpvOpUSubSatINTEL:
6402 case SpvOpIMul32x16INTEL:
6403 case SpvOpUMul32x16INTEL:
6404 vtn_handle_alu(b, opcode, w, count);
6405 break;
6406
6407 case SpvOpSDotKHR:
6408 case SpvOpUDotKHR:
6409 case SpvOpSUDotKHR:
6410 case SpvOpSDotAccSatKHR:
6411 case SpvOpUDotAccSatKHR:
6412 case SpvOpSUDotAccSatKHR:
6413 vtn_handle_integer_dot(b, opcode, w, count);
6414 break;
6415
6416 case SpvOpBitcast:
6417 vtn_handle_bitcast(b, w, count);
6418 break;
6419
6420 /* TODO: One day, we should probably do something with this information
6421 * For now, though, it's safe to implement them as no-ops.
6422 * Needed for Rusticl sycl support.
6423 */
6424 case SpvOpAssumeTrueKHR:
6425 break;
6426
6427 case SpvOpExpectKHR:
6428 case SpvOpVectorExtractDynamic:
6429 case SpvOpVectorInsertDynamic:
6430 case SpvOpVectorShuffle:
6431 case SpvOpCompositeConstruct:
6432 case SpvOpCompositeConstructReplicateEXT:
6433 case SpvOpCompositeExtract:
6434 case SpvOpCompositeInsert:
6435 case SpvOpCopyLogical:
6436 case SpvOpCopyObject:
6437 vtn_handle_composite(b, opcode, w, count);
6438 break;
6439
6440 case SpvOpEmitVertex:
6441 case SpvOpEndPrimitive:
6442 case SpvOpEmitStreamVertex:
6443 case SpvOpEndStreamPrimitive:
6444 case SpvOpControlBarrier:
6445 case SpvOpMemoryBarrier:
6446 vtn_handle_barrier(b, opcode, w, count);
6447 break;
6448
6449 case SpvOpGroupNonUniformElect:
6450 case SpvOpGroupNonUniformAll:
6451 case SpvOpGroupNonUniformAny:
6452 case SpvOpGroupNonUniformAllEqual:
6453 case SpvOpGroupNonUniformBroadcast:
6454 case SpvOpGroupNonUniformBroadcastFirst:
6455 case SpvOpGroupNonUniformBallot:
6456 case SpvOpGroupNonUniformInverseBallot:
6457 case SpvOpGroupNonUniformBallotBitExtract:
6458 case SpvOpGroupNonUniformBallotBitCount:
6459 case SpvOpGroupNonUniformBallotFindLSB:
6460 case SpvOpGroupNonUniformBallotFindMSB:
6461 case SpvOpGroupNonUniformShuffle:
6462 case SpvOpGroupNonUniformShuffleXor:
6463 case SpvOpGroupNonUniformShuffleUp:
6464 case SpvOpGroupNonUniformShuffleDown:
6465 case SpvOpGroupNonUniformIAdd:
6466 case SpvOpGroupNonUniformFAdd:
6467 case SpvOpGroupNonUniformIMul:
6468 case SpvOpGroupNonUniformFMul:
6469 case SpvOpGroupNonUniformSMin:
6470 case SpvOpGroupNonUniformUMin:
6471 case SpvOpGroupNonUniformFMin:
6472 case SpvOpGroupNonUniformSMax:
6473 case SpvOpGroupNonUniformUMax:
6474 case SpvOpGroupNonUniformFMax:
6475 case SpvOpGroupNonUniformBitwiseAnd:
6476 case SpvOpGroupNonUniformBitwiseOr:
6477 case SpvOpGroupNonUniformBitwiseXor:
6478 case SpvOpGroupNonUniformLogicalAnd:
6479 case SpvOpGroupNonUniformLogicalOr:
6480 case SpvOpGroupNonUniformLogicalXor:
6481 case SpvOpGroupNonUniformQuadBroadcast:
6482 case SpvOpGroupNonUniformQuadSwap:
6483 case SpvOpGroupNonUniformQuadAllKHR:
6484 case SpvOpGroupNonUniformQuadAnyKHR:
6485 case SpvOpGroupAll:
6486 case SpvOpGroupAny:
6487 case SpvOpGroupBroadcast:
6488 case SpvOpGroupIAdd:
6489 case SpvOpGroupFAdd:
6490 case SpvOpGroupFMin:
6491 case SpvOpGroupUMin:
6492 case SpvOpGroupSMin:
6493 case SpvOpGroupFMax:
6494 case SpvOpGroupUMax:
6495 case SpvOpGroupSMax:
6496 case SpvOpSubgroupBallotKHR:
6497 case SpvOpSubgroupFirstInvocationKHR:
6498 case SpvOpSubgroupReadInvocationKHR:
6499 case SpvOpSubgroupAllKHR:
6500 case SpvOpSubgroupAnyKHR:
6501 case SpvOpSubgroupAllEqualKHR:
6502 case SpvOpGroupIAddNonUniformAMD:
6503 case SpvOpGroupFAddNonUniformAMD:
6504 case SpvOpGroupFMinNonUniformAMD:
6505 case SpvOpGroupUMinNonUniformAMD:
6506 case SpvOpGroupSMinNonUniformAMD:
6507 case SpvOpGroupFMaxNonUniformAMD:
6508 case SpvOpGroupUMaxNonUniformAMD:
6509 case SpvOpGroupSMaxNonUniformAMD:
6510 case SpvOpSubgroupShuffleINTEL:
6511 case SpvOpSubgroupShuffleDownINTEL:
6512 case SpvOpSubgroupShuffleUpINTEL:
6513 case SpvOpSubgroupShuffleXorINTEL:
6514 case SpvOpGroupNonUniformRotateKHR:
6515 vtn_handle_subgroup(b, opcode, w, count);
6516 break;
6517
6518 case SpvOpPtrDiff:
6519 case SpvOpPtrEqual:
6520 case SpvOpPtrNotEqual:
6521 vtn_handle_ptr(b, opcode, w, count);
6522 break;
6523
6524 case SpvOpBeginInvocationInterlockEXT:
6525 nir_begin_invocation_interlock(&b->nb);
6526 break;
6527
6528 case SpvOpEndInvocationInterlockEXT:
6529 nir_end_invocation_interlock(&b->nb);
6530 break;
6531
6532 case SpvOpDemoteToHelperInvocation: {
6533 nir_demote(&b->nb);
6534 break;
6535 }
6536
6537 case SpvOpIsHelperInvocationEXT: {
6538 vtn_push_nir_ssa(b, w[2], nir_is_helper_invocation(&b->nb, 1));
6539 break;
6540 }
6541
6542 case SpvOpReadClockKHR: {
6543 SpvScope scope = vtn_constant_uint(b, w[3]);
6544 vtn_fail_if(scope != SpvScopeDevice && scope != SpvScopeSubgroup,
6545 "OpReadClockKHR Scope must be either "
6546 "ScopeDevice or ScopeSubgroup.");
6547
6548 /* Operation supports two result types: uvec2 and uint64_t. The NIR
6549 * intrinsic gives uvec2, so pack the result for the other case.
6550 */
6551 nir_def *result = nir_shader_clock(&b->nb, vtn_translate_scope(b, scope));
6552
6553 struct vtn_type *type = vtn_get_type(b, w[1]);
6554 const struct glsl_type *dest_type = type->type;
6555
6556 if (glsl_type_is_vector(dest_type)) {
6557 assert(dest_type == glsl_vector_type(GLSL_TYPE_UINT, 2));
6558 } else {
6559 assert(glsl_type_is_scalar(dest_type));
6560 assert(glsl_get_base_type(dest_type) == GLSL_TYPE_UINT64);
6561 result = nir_pack_64_2x32(&b->nb, result);
6562 }
6563
6564 vtn_push_nir_ssa(b, w[2], result);
6565 break;
6566 }
6567
6568 case SpvOpTraceNV:
6569 case SpvOpTraceRayKHR:
6570 case SpvOpReportIntersectionKHR:
6571 case SpvOpIgnoreIntersectionNV:
6572 case SpvOpTerminateRayNV:
6573 case SpvOpExecuteCallableNV:
6574 case SpvOpExecuteCallableKHR:
6575 vtn_handle_ray_intrinsic(b, opcode, w, count);
6576 break;
6577
6578 case SpvOpRayQueryInitializeKHR:
6579 case SpvOpRayQueryTerminateKHR:
6580 case SpvOpRayQueryGenerateIntersectionKHR:
6581 case SpvOpRayQueryConfirmIntersectionKHR:
6582 case SpvOpRayQueryProceedKHR:
6583 case SpvOpRayQueryGetIntersectionTypeKHR:
6584 case SpvOpRayQueryGetRayTMinKHR:
6585 case SpvOpRayQueryGetRayFlagsKHR:
6586 case SpvOpRayQueryGetIntersectionTKHR:
6587 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6588 case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6589 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6590 case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6591 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6592 case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6593 case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6594 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6595 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6596 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6597 case SpvOpRayQueryGetWorldRayDirectionKHR:
6598 case SpvOpRayQueryGetWorldRayOriginKHR:
6599 case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6600 case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6601 case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6602 vtn_handle_ray_query_intrinsic(b, opcode, w, count);
6603 break;
6604
6605 case SpvOpLifetimeStart:
6606 case SpvOpLifetimeStop:
6607 break;
6608
6609 case SpvOpGroupAsyncCopy:
6610 case SpvOpGroupWaitEvents:
6611 vtn_handle_opencl_core_instruction(b, opcode, w, count);
6612 break;
6613
6614 case SpvOpWritePackedPrimitiveIndices4x8NV:
6615 vtn_handle_write_packed_primitive_indices(b, opcode, w, count);
6616 break;
6617
6618 case SpvOpSetMeshOutputsEXT:
6619 nir_set_vertex_and_primitive_count(
6620 &b->nb, vtn_get_nir_ssa(b, w[1]), vtn_get_nir_ssa(b, w[2]),
6621 nir_undef(&b->nb, 1, 32));
6622 break;
6623
6624 case SpvOpAllocateNodePayloadsAMDX:
6625 vtn_handle_allocate_node_payloads(b, opcode, w, count);
6626 break;
6627
6628 case SpvOpFinishWritingNodePayloadAMDX:
6629 break;
6630
6631 case SpvOpCooperativeMatrixLoadKHR:
6632 case SpvOpCooperativeMatrixStoreKHR:
6633 case SpvOpCooperativeMatrixLengthKHR:
6634 case SpvOpCooperativeMatrixMulAddKHR:
6635 vtn_handle_cooperative_instruction(b, opcode, w, count);
6636 break;
6637
6638 default:
6639 vtn_fail_with_opcode("Unhandled opcode", opcode);
6640 }
6641
6642 return true;
6643 }
6644
6645 static bool
is_glslang(const struct vtn_builder * b)6646 is_glslang(const struct vtn_builder *b)
6647 {
6648 return b->generator_id == vtn_generator_glslang_reference_front_end ||
6649 b->generator_id == vtn_generator_shaderc_over_glslang;
6650 }
6651
6652 struct vtn_builder*
vtn_create_builder(const uint32_t * words,size_t word_count,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options)6653 vtn_create_builder(const uint32_t *words, size_t word_count,
6654 gl_shader_stage stage, const char *entry_point_name,
6655 const struct spirv_to_nir_options *options)
6656 {
6657 /* Initialize the vtn_builder object */
6658 struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
6659
6660 b->spirv = words;
6661 b->spirv_word_count = word_count;
6662 b->file = NULL;
6663 b->line = -1;
6664 b->col = -1;
6665 list_inithead(&b->functions);
6666 b->entry_point_stage = stage;
6667 b->entry_point_name = entry_point_name;
6668
6669 /*
6670 * Handle the SPIR-V header (first 5 dwords).
6671 * Can't use vtx_assert() as the setjmp(3) target isn't initialized yet.
6672 */
6673 if (word_count <= 5)
6674 goto fail;
6675
6676 if (words[0] != SpvMagicNumber) {
6677 vtn_err("words[0] was 0x%x, want 0x%x", words[0], SpvMagicNumber);
6678 goto fail;
6679 }
6680
6681 b->version = words[1];
6682 if (b->version < 0x10000) {
6683 vtn_err("version was 0x%x, want >= 0x10000", b->version);
6684 goto fail;
6685 }
6686
6687 b->generator_id = words[2] >> 16;
6688 uint16_t generator_version = words[2];
6689
6690 unsigned value_id_bound = words[3];
6691 if (words[4] != 0) {
6692 vtn_err("words[4] was %u, want 0", words[4]);
6693 goto fail;
6694 }
6695
6696 b->value_id_bound = value_id_bound;
6697
6698 /* Allocate all the data that can be dropped after parsing using
6699 * a cheaper allocation strategy. Use the value_id_bound and the
6700 * size of the common internal structs to approximate a good
6701 * buffer_size.
6702 */
6703 const linear_opts lin_opts = {
6704 .min_buffer_size = 2 * value_id_bound * (sizeof(struct vtn_value) +
6705 sizeof(struct vtn_ssa_value)),
6706 };
6707 b->lin_ctx = linear_context_with_opts(b, &lin_opts);
6708
6709 struct spirv_to_nir_options *dup_options =
6710 vtn_alloc(b, struct spirv_to_nir_options);
6711 *dup_options = *options;
6712
6713 b->options = dup_options;
6714 b->values = vtn_zalloc_array(b, struct vtn_value, value_id_bound);
6715
6716 if (b->options->capabilities != NULL)
6717 b->supported_capabilities = *b->options->capabilities;
6718 else
6719 b->supported_capabilities = implemented_capabilities;
6720
6721 spirv_capabilities_set(&b->supported_capabilities, SpvCapabilityLinkage,
6722 b->options->create_library);
6723
6724 /* In GLSLang commit 8297936dd6eb3, their handling of barrier() was fixed
6725 * to provide correct memory semantics on compute shader barrier()
6726 * commands. Prior to that, we need to fix them up ourselves. This
6727 * GLSLang fix caused them to bump to generator version 3.
6728 */
6729 b->wa_glslang_cs_barrier = is_glslang(b) && generator_version < 3;
6730
6731 /* Identifying the LLVM-SPIRV translator:
6732 *
6733 * The LLVM-SPIRV translator currently doesn't store any generator ID [1].
6734 * Our use case involving the SPIRV-Tools linker also mean we want to check
6735 * for that tool instead. Finally the SPIRV-Tools linker also stores its
6736 * generator ID in the wrong location [2].
6737 *
6738 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1223
6739 * [2] : https://github.com/KhronosGroup/SPIRV-Tools/pull/4549
6740 */
6741 const bool is_llvm_spirv_translator =
6742 (b->generator_id == 0 &&
6743 generator_version == vtn_generator_spirv_tools_linker) ||
6744 b->generator_id == vtn_generator_spirv_tools_linker;
6745
6746 /* The LLVM-SPIRV translator generates Undef initializers for _local
6747 * variables [1].
6748 *
6749 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1224
6750 */
6751 b->wa_llvm_spirv_ignore_workgroup_initializer =
6752 b->options->environment == NIR_SPIRV_OPENCL && is_llvm_spirv_translator;
6753
6754 /* Older versions of GLSLang would incorrectly emit OpReturn after
6755 * OpEmitMeshTasksEXT. This is incorrect since the latter is already
6756 * a terminator instruction.
6757 *
6758 * See https://github.com/KhronosGroup/glslang/issues/3020 for details.
6759 *
6760 * Clay Shader Compiler (used by GravityMark) is also affected.
6761 */
6762 b->wa_ignore_return_after_emit_mesh_tasks =
6763 (is_glslang(b) && generator_version < 11) ||
6764 (b->generator_id == vtn_generator_clay_shader_compiler &&
6765 generator_version < 18);
6766
6767 if (b->options->environment == NIR_SPIRV_VULKAN && b->version < 0x10400)
6768 b->vars_used_indirectly = _mesa_pointer_set_create(b);
6769
6770 if (b->options->debug_info)
6771 b->strings = _mesa_pointer_hash_table_create(b);
6772
6773 return b;
6774 fail:
6775 ralloc_free(b);
6776 return NULL;
6777 }
6778
6779 /* See glsl_type_add_to_function_params and vtn_ssa_value_add_to_call_params */
6780 static void
vtn_emit_kernel_entry_point_wrapper_struct_param(struct nir_builder * b,nir_deref_instr * deref,nir_call_instr * call,unsigned * idx)6781 vtn_emit_kernel_entry_point_wrapper_struct_param(struct nir_builder *b,
6782 nir_deref_instr *deref,
6783 nir_call_instr *call,
6784 unsigned *idx)
6785 {
6786 if (glsl_type_is_vector_or_scalar(deref->type)) {
6787 call->params[(*idx)++] = nir_src_for_ssa(nir_load_deref(b, deref));
6788 } else {
6789 unsigned elems = glsl_get_length(deref->type);
6790 for (unsigned i = 0; i < elems; i++) {
6791 nir_deref_instr *child_deref = glsl_type_is_struct(deref->type)
6792 ? nir_build_deref_struct(b, deref, i)
6793 : nir_build_deref_array_imm(b, deref, i);
6794 vtn_emit_kernel_entry_point_wrapper_struct_param(b, child_deref, call,
6795 idx);
6796 }
6797 }
6798 }
6799
6800 static nir_function *
vtn_emit_kernel_entry_point_wrapper(struct vtn_builder * b,nir_function * entry_point)6801 vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b,
6802 nir_function *entry_point)
6803 {
6804 vtn_assert(entry_point == b->entry_point->func->nir_func);
6805 vtn_fail_if(!entry_point->name, "entry points are required to have a name");
6806 const char *func_name =
6807 ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name);
6808
6809 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
6810
6811 nir_function *main_entry_point = nir_function_create(b->shader, func_name);
6812 nir_function_impl *impl = nir_function_impl_create(main_entry_point);
6813 b->nb = nir_builder_at(nir_after_impl(impl));
6814 b->func_param_idx = 0;
6815
6816 nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point);
6817
6818 unsigned call_idx = 0;
6819 for (unsigned i = 0; i < b->entry_point->func->type->length; ++i) {
6820 struct vtn_type *param_type = b->entry_point->func->type->params[i];
6821
6822 b->shader->info.cs.has_variable_shared_mem |=
6823 param_type->storage_class == SpvStorageClassWorkgroup;
6824
6825 /* consider all pointers to function memory to be parameters passed
6826 * by value
6827 */
6828 bool is_by_val = param_type->base_type == vtn_base_type_pointer &&
6829 param_type->storage_class == SpvStorageClassFunction;
6830
6831 /* input variable */
6832 nir_variable *in_var = rzalloc(b->nb.shader, nir_variable);
6833
6834 if (is_by_val) {
6835 in_var->data.mode = nir_var_uniform;
6836 in_var->type = param_type->pointed->type;
6837 } else if (param_type->base_type == vtn_base_type_image) {
6838 in_var->data.mode = nir_var_image;
6839 in_var->type = param_type->glsl_image;
6840 in_var->data.access =
6841 spirv_to_gl_access_qualifier(b, param_type->access_qualifier);
6842 } else if (param_type->base_type == vtn_base_type_sampler) {
6843 in_var->data.mode = nir_var_uniform;
6844 in_var->type = glsl_bare_sampler_type();
6845 } else {
6846 in_var->data.mode = nir_var_uniform;
6847 in_var->type = param_type->type;
6848 }
6849
6850 in_var->data.read_only = true;
6851 in_var->data.location = i;
6852
6853 nir_shader_add_variable(b->nb.shader, in_var);
6854
6855 /* we have to copy the entire variable into function memory */
6856 if (is_by_val) {
6857 nir_variable *copy_var =
6858 nir_local_variable_create(impl, in_var->type, "copy_in");
6859 nir_copy_var(&b->nb, copy_var, in_var);
6860 call->params[call_idx++] =
6861 nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->def);
6862 } else if (param_type->base_type == vtn_base_type_image ||
6863 param_type->base_type == vtn_base_type_sampler) {
6864 /* Don't load the var, just pass a deref of it */
6865 call->params[call_idx++] =
6866 nir_src_for_ssa(&nir_build_deref_var(&b->nb, in_var)->def);
6867 } else if (param_type->base_type == vtn_base_type_struct) {
6868 /* We decompose struct and array parameters in vtn, so we'll need to
6869 * handle it here explicitly.
6870 * We have to keep the arguments on the actual entry point intact,
6871 * because the runtimes rely on it to match the SPIR-V.
6872 */
6873 nir_deref_instr *deref = nir_build_deref_var(&b->nb, in_var);
6874 vtn_emit_kernel_entry_point_wrapper_struct_param(&b->nb, deref, call,
6875 &call_idx);
6876 } else {
6877 call->params[call_idx++] =
6878 nir_src_for_ssa(nir_load_var(&b->nb, in_var));
6879 }
6880 }
6881
6882 assert(call_idx == entry_point->num_params);
6883
6884 nir_builder_instr_insert(&b->nb, &call->instr);
6885
6886 return main_entry_point;
6887 }
6888
6889 static bool
can_remove(nir_variable * var,void * data)6890 can_remove(nir_variable *var, void *data)
6891 {
6892 const struct set *vars_used_indirectly = data;
6893 return !_mesa_set_search(vars_used_indirectly, var);
6894 }
6895
6896 nir_shader *
spirv_to_nir(const uint32_t * words,size_t word_count,struct nir_spirv_specialization * spec,unsigned num_spec,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options,const nir_shader_compiler_options * nir_options)6897 spirv_to_nir(const uint32_t *words, size_t word_count,
6898 struct nir_spirv_specialization *spec, unsigned num_spec,
6899 gl_shader_stage stage, const char *entry_point_name,
6900 const struct spirv_to_nir_options *options,
6901 const nir_shader_compiler_options *nir_options)
6902
6903 {
6904 mesa_spirv_debug_init();
6905
6906 if (MESA_SPIRV_DEBUG(ASM))
6907 spirv_print_asm(stderr, words, word_count);
6908
6909 const uint32_t *word_end = words + word_count;
6910
6911 struct vtn_builder *b = vtn_create_builder(words, word_count,
6912 stage, entry_point_name,
6913 options);
6914
6915 if (b == NULL)
6916 return NULL;
6917
6918 /* See also _vtn_fail() */
6919 if (vtn_setjmp(b->fail_jump)) {
6920 ralloc_free(b);
6921 return NULL;
6922 }
6923
6924 const char *dump_path = secure_getenv("MESA_SPIRV_DUMP_PATH");
6925 if (dump_path)
6926 vtn_dump_shader(b, dump_path, "spirv");
6927
6928 b->shader = nir_shader_create(b, stage, nir_options, NULL);
6929 b->shader->info.subgroup_size = options->subgroup_size;
6930 b->shader->info.float_controls_execution_mode = options->float_controls_execution_mode;
6931 b->shader->info.cs.shader_index = options->shader_index;
6932 _mesa_blake3_compute(words, word_count * sizeof(uint32_t), b->shader->info.source_blake3);
6933
6934 /* Skip the SPIR-V header, handled at vtn_create_builder */
6935 words+= 5;
6936
6937 /* Handle all the preamble instructions */
6938 words = vtn_foreach_instruction(b, words, word_end,
6939 vtn_handle_preamble_instruction);
6940
6941 if (b->shader->info.subgroup_size == SUBGROUP_SIZE_UNIFORM &&
6942 b->enabled_capabilities.GroupNonUniform)
6943 b->shader->info.subgroup_size = SUBGROUP_SIZE_API_CONSTANT;
6944
6945 /* DirectXShaderCompiler and glslang/shaderc both create OpKill from HLSL's
6946 * discard/clip, which uses demote semantics. DirectXShaderCompiler will use
6947 * demote if the extension is enabled, so we disable this workaround in that
6948 * case.
6949 *
6950 * Related glslang issue: https://github.com/KhronosGroup/glslang/issues/2416
6951 */
6952 bool dxsc = b->generator_id == vtn_generator_spiregg;
6953 b->convert_discard_to_demote = (nir_options->discard_is_demote ||
6954 (dxsc && !b->enabled_capabilities.DemoteToHelperInvocation) ||
6955 (is_glslang(b) && b->source_lang == SpvSourceLanguageHLSL)) &&
6956 b->supported_capabilities.DemoteToHelperInvocation;
6957
6958 if (!options->create_library && b->entry_point == NULL) {
6959 vtn_fail("Entry point not found for %s shader \"%s\"",
6960 _mesa_shader_stage_to_string(stage), entry_point_name);
6961 ralloc_free(b);
6962 return NULL;
6963 }
6964
6965 /* Ensure a sane address mode is being used for function temps */
6966 assert(nir_address_format_bit_size(b->options->temp_addr_format) == nir_get_ptr_bitsize(b->shader));
6967 assert(nir_address_format_num_components(b->options->temp_addr_format) == 1);
6968
6969 /* Set shader info defaults */
6970 if (stage == MESA_SHADER_GEOMETRY)
6971 b->shader->info.gs.invocations = 1;
6972
6973 /* Parse execution modes. */
6974 if (!options->create_library)
6975 vtn_foreach_execution_mode(b, b->entry_point,
6976 vtn_handle_execution_mode, NULL);
6977
6978 b->specializations = spec;
6979 b->num_specializations = num_spec;
6980
6981 /* Handle all variable, type, and constant instructions */
6982 words = vtn_foreach_instruction(b, words, word_end,
6983 vtn_handle_variable_or_type_instruction);
6984
6985 /* Parse execution modes that depend on IDs. Must happen after we have
6986 * constants parsed.
6987 */
6988 if (!options->create_library)
6989 vtn_foreach_execution_mode(b, b->entry_point,
6990 vtn_handle_execution_mode_id, NULL);
6991
6992 if (b->workgroup_size_builtin) {
6993 vtn_assert(gl_shader_stage_uses_workgroup(stage));
6994 vtn_assert(b->workgroup_size_builtin->type->type ==
6995 glsl_vector_type(GLSL_TYPE_UINT, 3));
6996
6997 nir_const_value *const_size =
6998 b->workgroup_size_builtin->constant->values;
6999
7000 b->shader->info.workgroup_size[0] = const_size[0].u32;
7001 b->shader->info.workgroup_size[1] = const_size[1].u32;
7002 b->shader->info.workgroup_size[2] = const_size[2].u32;
7003 }
7004
7005 /* Set types on all vtn_values */
7006 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
7007
7008 vtn_build_cfg(b, words, word_end);
7009
7010 if (!options->create_library) {
7011 assert(b->entry_point->value_type == vtn_value_type_function);
7012 b->entry_point->func->referenced = true;
7013 }
7014
7015 bool progress;
7016 do {
7017 progress = false;
7018 vtn_foreach_function(func, &b->functions) {
7019 if ((options->create_library || func->referenced) && !func->emitted) {
7020 _mesa_hash_table_clear(b->strings, NULL);
7021 vtn_function_emit(b, func, vtn_handle_body_instruction);
7022 progress = true;
7023 }
7024 }
7025 } while (progress);
7026
7027 if (!options->create_library) {
7028 vtn_assert(b->entry_point->value_type == vtn_value_type_function);
7029 nir_function *entry_point = b->entry_point->func->nir_func;
7030 vtn_assert(entry_point);
7031
7032 entry_point->dont_inline = false;
7033 /* post process entry_points with input params */
7034 if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL)
7035 entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point);
7036
7037 entry_point->is_entrypoint = true;
7038 }
7039
7040 if (MESA_SPIRV_DEBUG(VALUES)) {
7041 vtn_dump_values(b, stdout);
7042 }
7043
7044 /* structurize the CFG */
7045 nir_lower_goto_ifs(b->shader);
7046
7047 nir_validate_shader(b->shader, "after spirv cfg");
7048
7049 nir_lower_continue_constructs(b->shader);
7050
7051 /* A SPIR-V module can have multiple shaders stages and also multiple
7052 * shaders of the same stage. Global variables are declared per-module.
7053 *
7054 * Starting in SPIR-V 1.4 the list of global variables is part of
7055 * OpEntryPoint, so only valid ones will be created. Previous versions
7056 * only have Input and Output variables listed, so remove dead variables to
7057 * clean up the remaining ones.
7058 */
7059 if (!options->create_library && b->version < 0x10400) {
7060 const nir_remove_dead_variables_options dead_opts = {
7061 .can_remove_var = can_remove,
7062 .can_remove_var_data = b->vars_used_indirectly,
7063 };
7064 nir_remove_dead_variables(b->shader, ~(nir_var_function_temp |
7065 nir_var_shader_out |
7066 nir_var_shader_in |
7067 nir_var_system_value),
7068 b->vars_used_indirectly ? &dead_opts : NULL);
7069 }
7070
7071 nir_foreach_variable_in_shader(var, b->shader) {
7072 switch (var->data.mode) {
7073 case nir_var_mem_ubo:
7074 b->shader->info.num_ubos++;
7075 break;
7076 case nir_var_mem_ssbo:
7077 b->shader->info.num_ssbos++;
7078 break;
7079 case nir_var_mem_push_const:
7080 vtn_assert(b->shader->num_uniforms == 0);
7081 b->shader->num_uniforms =
7082 glsl_get_explicit_size(glsl_without_array(var->type), false);
7083 break;
7084 }
7085 }
7086
7087 /* We sometimes generate bogus derefs that, while never used, give the
7088 * validator a bit of heartburn. Run dead code to get rid of them.
7089 */
7090 nir_opt_dce(b->shader);
7091
7092 /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is
7093 * a Block, all of them will be and Blocks are explicitly laid out.
7094 */
7095 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7096 if (glsl_type_is_interface(var->type)) {
7097 assert(b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR);
7098 b->shader->info.shared_memory_explicit_layout = true;
7099 break;
7100 }
7101 }
7102 if (b->shader->info.shared_memory_explicit_layout) {
7103 unsigned size = 0;
7104 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7105 assert(glsl_type_is_interface(var->type));
7106 const bool align_to_stride = false;
7107 size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
7108 }
7109 b->shader->info.shared_size = size;
7110 }
7111
7112 if (stage == MESA_SHADER_FRAGMENT) {
7113 /* From the Vulkan 1.2.199 spec:
7114 *
7115 * "If a fragment shader entry point’s interface includes an input
7116 * variable decorated with SamplePosition, Sample Shading is
7117 * considered enabled with a minSampleShading value of 1.0."
7118 *
7119 * Similar text exists for SampleId. Regarding the Sample decoration,
7120 * the Vulkan 1.2.199 spec says:
7121 *
7122 * "If a fragment shader input is decorated with Sample, a separate
7123 * value must be assigned to that variable for each covered sample in
7124 * the fragment, and that value must be sampled at the location of
7125 * the individual sample. When rasterizationSamples is
7126 * VK_SAMPLE_COUNT_1_BIT, the fragment center must be used for
7127 * Centroid, Sample, and undecorated attribute interpolation."
7128 *
7129 * Unfortunately, this isn't quite as clear about static use and the
7130 * interface but the static use check should be valid.
7131 *
7132 * For OpenGL, similar language exists but it's all more wishy-washy.
7133 * We'll assume the same behavior across APIs.
7134 */
7135 nir_foreach_variable_with_modes(var, b->shader,
7136 nir_var_shader_in |
7137 nir_var_system_value) {
7138 struct nir_variable_data *members =
7139 var->members ? var->members : &var->data;
7140 uint16_t num_members = var->members ? var->num_members : 1;
7141 for (uint16_t i = 0; i < num_members; i++) {
7142 if (members[i].mode == nir_var_system_value &&
7143 (members[i].location == SYSTEM_VALUE_SAMPLE_ID ||
7144 members[i].location == SYSTEM_VALUE_SAMPLE_POS))
7145 b->shader->info.fs.uses_sample_shading = true;
7146
7147 if (members[i].mode == nir_var_shader_in && members[i].sample)
7148 b->shader->info.fs.uses_sample_shading = true;
7149 }
7150 }
7151 }
7152
7153 /* Work around applications that declare shader_call_data variables inside
7154 * ray generation shaders or multiple shader_call_data variables in callable
7155 * shaders.
7156 *
7157 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/5326
7158 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/11585
7159 */
7160 if (gl_shader_stage_is_rt(b->shader->info.stage))
7161 NIR_PASS(_, b->shader, nir_remove_dead_variables, nir_var_shader_call_data,
7162 NULL);
7163
7164 /* Unparent the shader from the vtn_builder before we delete the builder */
7165 ralloc_steal(NULL, b->shader);
7166
7167 nir_shader *shader = b->shader;
7168 ralloc_free(b);
7169
7170 return shader;
7171 }
7172
7173 static void
print_func_param(FILE * fp,nir_function * func,unsigned p)7174 print_func_param(FILE *fp, nir_function *func, unsigned p)
7175 {
7176 if (func->params[p].name) {
7177 fputs(func->params[p].name, fp);
7178 } else {
7179 fprintf(fp, "arg%u\n", p);
7180 }
7181 }
7182
7183 static bool
func_to_nir_builder(FILE * fp,struct vtn_function * func)7184 func_to_nir_builder(FILE *fp, struct vtn_function *func)
7185 {
7186 nir_function *nir_func = func->nir_func;
7187 struct vtn_type *return_type = func->type->return_type;
7188 bool returns = return_type->base_type != vtn_base_type_void;
7189
7190 if (returns && return_type->base_type != vtn_base_type_scalar &&
7191 return_type->base_type != vtn_base_type_vector) {
7192 fprintf(stderr, "Unsupported return type for %s", nir_func->name);
7193 return false;
7194 }
7195
7196 /* If there is a return type, the first NIR parameter is the return deref,
7197 * so offset by that for logical parameter iteration.
7198 */
7199 unsigned first_param = returns ? 1 : 0;
7200
7201 /* Generate function signature */
7202 fprintf(fp, "static inline %s\n", returns ? "nir_def *": "void");
7203 fprintf(fp, "%s(nir_builder *b", nir_func->name);
7204
7205 for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7206 fprintf(fp, ", nir_def *");
7207 print_func_param(fp, nir_func, i);
7208 }
7209
7210 fprintf(fp, ")\n{\n");
7211
7212 /* Validate inputs. nir_validate will do this too, but the
7213 * errors/backtraces from these asserts should be nicer.
7214 */
7215 for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7216 nir_parameter *param = &nir_func->params[i];
7217
7218 fprintf(fp, " assert(");
7219 print_func_param(fp, nir_func, i);
7220 fprintf(fp, "->bit_size == %u);\n", param->bit_size);
7221
7222 fprintf(fp, " assert(");
7223 print_func_param(fp, nir_func, i);
7224 fprintf(fp, "->num_components == %u);\n", param->num_components);
7225 }
7226
7227 fprintf(fp, "\n");
7228
7229 /* Find the function to call. If not found, create a prototype */
7230 fprintf(fp, " nir_function *func = nir_shader_get_function_for_name(b->shader, \"%s\");\n",
7231 nir_func->name);
7232 fprintf(fp, "\n");
7233 fprintf(fp, " if (!func) {\n");
7234 fprintf(fp, " func = nir_function_create(b->shader, \"%s\");\n",
7235 nir_func->name);
7236 fprintf(fp, " func->num_params = %u;\n", nir_func->num_params);
7237 fprintf(fp, " func->params = rzalloc_array(b->shader, nir_parameter, func->num_params);\n");
7238
7239 for (unsigned i = 0; i < nir_func->num_params; ++i) {
7240 nir_parameter param = nir_func->params[i];
7241
7242 fprintf(fp, "\n");
7243 fprintf(fp, " func->params[%u].bit_size = %u;\n", i, param.bit_size);
7244 fprintf(fp, " func->params[%u].num_components = %u;\n", i,
7245 param.num_components);
7246
7247 if (returns && i == 0) {
7248 fprintf(fp, " func->params[%u].is_return = true;\n", i);
7249 }
7250
7251 if (param.name) {
7252 fprintf(fp, " func->params[%u].name = \"%s\";\n", i, param.name);
7253 }
7254 }
7255
7256 fprintf(fp, " }\n\n");
7257
7258
7259 if (returns) {
7260 /* We assume that vec3 variables are lowered to vec4. Mirror that here so
7261 * we don't need to lower vec3 to vec4 again at link-time.
7262 */
7263 assert(glsl_type_is_vector_or_scalar(return_type->type));
7264 unsigned elements = return_type->type->vector_elements;
7265 if (elements == 3)
7266 elements = 4;
7267
7268 /* Reconstruct the return type. */
7269 fprintf(fp, " const struct glsl_type *ret_type = glsl_vector_type(%u, %u);\n",
7270 return_type->type->base_type, elements);
7271
7272 /* With the type, we can make a variable and get a deref to pass in */
7273 fprintf(fp, " nir_variable *ret = nir_local_variable_create(b->impl, ret_type, \"return\");\n");
7274 fprintf(fp, " nir_deref_instr *deref = nir_build_deref_var(b, ret);\n");
7275
7276 /* XXX: This is a hack due to ptr size differing between KERNEL and other
7277 * shader stages. This needs to be fixed in core NIR.
7278 */
7279 fprintf(fp, " deref->def.bit_size = %u;\n", nir_func->params[0].bit_size);
7280 fprintf(fp, "\n");
7281 }
7282
7283 /* Call the function */
7284 fprintf(fp, " nir_call(b, func");
7285
7286 if (returns)
7287 fprintf(fp, ", &deref->def");
7288
7289 for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7290 fprintf(fp, ", ");
7291 print_func_param(fp, nir_func, i);
7292 }
7293
7294 fprintf(fp, ");\n");
7295
7296 /* Load the return value if any, undoing the vec3->vec4 lowering. */
7297 if (returns) {
7298 fprintf(fp, "\n");
7299
7300 if (return_type->type->vector_elements == 3)
7301 fprintf(fp, " return nir_trim_vector(b, nir_load_deref(b, deref), 3);\n");
7302 else
7303 fprintf(fp, " return nir_load_deref(b, deref);\n");
7304 }
7305
7306 fprintf(fp, "}\n\n");
7307 return true;
7308 }
7309
7310 bool
spirv_library_to_nir_builder(FILE * fp,const uint32_t * words,size_t word_count,const struct spirv_to_nir_options * options)7311 spirv_library_to_nir_builder(FILE *fp, const uint32_t *words, size_t word_count,
7312 const struct spirv_to_nir_options *options)
7313 {
7314 #ifndef NDEBUG
7315 mesa_spirv_debug_init();
7316 #endif
7317
7318 const uint32_t *word_end = words + word_count;
7319
7320 struct vtn_builder *b = vtn_create_builder(words, word_count,
7321 MESA_SHADER_KERNEL, "placeholder name",
7322 options);
7323
7324 if (b == NULL)
7325 return false;
7326
7327 /* See also _vtn_fail() */
7328 if (vtn_setjmp(b->fail_jump)) {
7329 ralloc_free(b);
7330 return false;
7331 }
7332
7333 b->shader = nir_shader_create(b, MESA_SHADER_KERNEL,
7334 &(const nir_shader_compiler_options){0}, NULL);
7335
7336 /* Skip the SPIR-V header, handled at vtn_create_builder */
7337 words+= 5;
7338
7339 /* Handle all the preamble instructions */
7340 words = vtn_foreach_instruction(b, words, word_end,
7341 vtn_handle_preamble_instruction);
7342
7343 /* Handle all variable, type, and constant instructions */
7344 words = vtn_foreach_instruction(b, words, word_end,
7345 vtn_handle_variable_or_type_instruction);
7346
7347 /* Set types on all vtn_values */
7348 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
7349
7350 vtn_build_cfg(b, words, word_end);
7351
7352 fprintf(fp, "#include \"compiler/nir/nir_builder.h\"\n\n");
7353
7354 nir_fixup_is_exported(b->shader);
7355
7356 vtn_foreach_function(func, &b->functions) {
7357 if (!func->nir_func->is_exported || func->nir_func->is_entrypoint)
7358 continue;
7359
7360 if (!func_to_nir_builder(fp, func))
7361 return false;
7362 }
7363
7364 ralloc_free(b);
7365 return true;
7366 }
7367
7368 static unsigned
vtn_id_for_type(struct vtn_builder * b,struct vtn_type * type)7369 vtn_id_for_type(struct vtn_builder *b, struct vtn_type *type)
7370 {
7371 for (unsigned i = 0; i < b->value_id_bound; i++) {
7372 struct vtn_value *v = &b->values[i];
7373 if (v->value_type == vtn_value_type_type &&
7374 v->type == type)
7375 return i;
7376 }
7377
7378 return 0;
7379 }
7380
7381 void
vtn_print_value(struct vtn_builder * b,struct vtn_value * val,FILE * f)7382 vtn_print_value(struct vtn_builder *b, struct vtn_value *val, FILE *f)
7383 {
7384 fprintf(f, "%s", vtn_value_type_to_string(val->value_type));
7385 switch (val->value_type) {
7386 case vtn_value_type_ssa: {
7387 struct vtn_ssa_value *ssa = val->ssa;
7388 fprintf(f, " glsl_type=%s", glsl_get_type_name(ssa->type));
7389 break;
7390 }
7391
7392 case vtn_value_type_constant: {
7393 fprintf(f, " type=%d", vtn_id_for_type(b, val->type));
7394 if (val->is_null_constant)
7395 fprintf(f, " null");
7396 else if (val->is_undef_constant)
7397 fprintf(f, " undef");
7398 break;
7399 }
7400
7401 case vtn_value_type_pointer: {
7402 struct vtn_pointer *pointer = val->pointer;
7403 fprintf(f, " ptr_type=%u", vtn_id_for_type(b, pointer->type));
7404 fprintf(f, " (pointed-)type=%u", vtn_id_for_type(b, val->pointer->type->pointed));
7405
7406 if (pointer->deref) {
7407 fprintf(f, "\n NIR: ");
7408 nir_print_instr(&pointer->deref->instr, f);
7409 }
7410 break;
7411 }
7412
7413 case vtn_value_type_type: {
7414 struct vtn_type *type = val->type;
7415 fprintf(f, " %s", vtn_base_type_to_string(type->base_type));
7416 switch (type->base_type) {
7417 case vtn_base_type_pointer:
7418 fprintf(f, " deref=%d", vtn_id_for_type(b, type->pointed));
7419 fprintf(f, " %s", spirv_storageclass_to_string(val->type->storage_class));
7420 break;
7421 default:
7422 break;
7423 }
7424 if (type->type)
7425 fprintf(f, " glsl_type=%s", glsl_get_type_name(type->type));
7426 break;
7427 }
7428
7429 default:
7430 break;
7431 }
7432 fprintf(f, "\n");
7433 }
7434
7435 void
vtn_dump_values(struct vtn_builder * b,FILE * f)7436 vtn_dump_values(struct vtn_builder *b, FILE *f)
7437 {
7438 fprintf(f, "=== SPIR-V values\n");
7439 for (unsigned i = 1; i < b->value_id_bound; i++) {
7440 struct vtn_value *val = &b->values[i];
7441 fprintf(f, "%8d = ", i);
7442 vtn_print_value(b, val, f);
7443 }
7444 fprintf(f, "===\n");
7445 }
7446