1 /*
2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22 */
23
24 #include "spirv_glsl.hpp"
25 #include "GLSL.std.450.h"
26 #include "spirv_common.hpp"
27 #include <algorithm>
28 #include <assert.h>
29 #include <cmath>
30 #include <limits>
31 #include <locale.h>
32 #include <utility>
33
34 #ifndef _WIN32
35 #include <langinfo.h>
36 #endif
37 #include <locale.h>
38
39 using namespace spv;
40 using namespace SPIRV_CROSS_NAMESPACE;
41 using namespace std;
42
43 enum ExtraSubExpressionType
44 {
45 // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
46 EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
47 EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
48 };
49
is_unsigned_opcode(Op op)50 static bool is_unsigned_opcode(Op op)
51 {
52 // Don't have to be exhaustive, only relevant for legacy target checking ...
53 switch (op)
54 {
55 case OpShiftRightLogical:
56 case OpUGreaterThan:
57 case OpUGreaterThanEqual:
58 case OpULessThan:
59 case OpULessThanEqual:
60 case OpUConvert:
61 case OpUDiv:
62 case OpUMod:
63 case OpUMulExtended:
64 case OpConvertUToF:
65 case OpConvertFToU:
66 return true;
67
68 default:
69 return false;
70 }
71 }
72
is_unsigned_glsl_opcode(GLSLstd450 op)73 static bool is_unsigned_glsl_opcode(GLSLstd450 op)
74 {
75 // Don't have to be exhaustive, only relevant for legacy target checking ...
76 switch (op)
77 {
78 case GLSLstd450UClamp:
79 case GLSLstd450UMin:
80 case GLSLstd450UMax:
81 case GLSLstd450FindUMsb:
82 return true;
83
84 default:
85 return false;
86 }
87 }
88
packing_is_vec4_padded(BufferPackingStandard packing)89 static bool packing_is_vec4_padded(BufferPackingStandard packing)
90 {
91 switch (packing)
92 {
93 case BufferPackingHLSLCbuffer:
94 case BufferPackingHLSLCbufferPackOffset:
95 case BufferPackingStd140:
96 case BufferPackingStd140EnhancedLayout:
97 return true;
98
99 default:
100 return false;
101 }
102 }
103
packing_is_hlsl(BufferPackingStandard packing)104 static bool packing_is_hlsl(BufferPackingStandard packing)
105 {
106 switch (packing)
107 {
108 case BufferPackingHLSLCbuffer:
109 case BufferPackingHLSLCbufferPackOffset:
110 return true;
111
112 default:
113 return false;
114 }
115 }
116
packing_has_flexible_offset(BufferPackingStandard packing)117 static bool packing_has_flexible_offset(BufferPackingStandard packing)
118 {
119 switch (packing)
120 {
121 case BufferPackingStd140:
122 case BufferPackingStd430:
123 case BufferPackingScalar:
124 case BufferPackingHLSLCbuffer:
125 return false;
126
127 default:
128 return true;
129 }
130 }
131
packing_is_scalar(BufferPackingStandard packing)132 static bool packing_is_scalar(BufferPackingStandard packing)
133 {
134 switch (packing)
135 {
136 case BufferPackingScalar:
137 case BufferPackingScalarEnhancedLayout:
138 return true;
139
140 default:
141 return false;
142 }
143 }
144
packing_to_substruct_packing(BufferPackingStandard packing)145 static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
146 {
147 switch (packing)
148 {
149 case BufferPackingStd140EnhancedLayout:
150 return BufferPackingStd140;
151 case BufferPackingStd430EnhancedLayout:
152 return BufferPackingStd430;
153 case BufferPackingHLSLCbufferPackOffset:
154 return BufferPackingHLSLCbuffer;
155 case BufferPackingScalarEnhancedLayout:
156 return BufferPackingScalar;
157 default:
158 return packing;
159 }
160 }
161
init()162 void CompilerGLSL::init()
163 {
164 if (ir.source.known)
165 {
166 options.es = ir.source.es;
167 options.version = ir.source.version;
168 }
169
170 // Query the locale to see what the decimal point is.
171 // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
172 // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
173 // tricky.
174 #ifdef _WIN32
175 // On Windows, localeconv uses thread-local storage, so it should be fine.
176 const struct lconv *conv = localeconv();
177 if (conv && conv->decimal_point)
178 current_locale_radix_character = *conv->decimal_point;
179 #elif defined(__ANDROID__) && __ANDROID_API__ < 26
180 // nl_langinfo is not supported on this platform, fall back to the worse alternative.
181 const struct lconv *conv = localeconv();
182 if (conv && conv->decimal_point)
183 current_locale_radix_character = *conv->decimal_point;
184 #else
185 // localeconv, the portable function is not MT safe ...
186 const char *decimal_point = nl_langinfo(RADIXCHAR);
187 if (decimal_point && *decimal_point != '\0')
188 current_locale_radix_character = *decimal_point;
189 #endif
190 }
191
to_pls_layout(PlsFormat format)192 static const char *to_pls_layout(PlsFormat format)
193 {
194 switch (format)
195 {
196 case PlsR11FG11FB10F:
197 return "layout(r11f_g11f_b10f) ";
198 case PlsR32F:
199 return "layout(r32f) ";
200 case PlsRG16F:
201 return "layout(rg16f) ";
202 case PlsRGB10A2:
203 return "layout(rgb10_a2) ";
204 case PlsRGBA8:
205 return "layout(rgba8) ";
206 case PlsRG16:
207 return "layout(rg16) ";
208 case PlsRGBA8I:
209 return "layout(rgba8i)";
210 case PlsRG16I:
211 return "layout(rg16i) ";
212 case PlsRGB10A2UI:
213 return "layout(rgb10_a2ui) ";
214 case PlsRGBA8UI:
215 return "layout(rgba8ui) ";
216 case PlsRG16UI:
217 return "layout(rg16ui) ";
218 case PlsR32UI:
219 return "layout(r32ui) ";
220 default:
221 return "";
222 }
223 }
224
pls_format_to_basetype(PlsFormat format)225 static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
226 {
227 switch (format)
228 {
229 default:
230 case PlsR11FG11FB10F:
231 case PlsR32F:
232 case PlsRG16F:
233 case PlsRGB10A2:
234 case PlsRGBA8:
235 case PlsRG16:
236 return SPIRType::Float;
237
238 case PlsRGBA8I:
239 case PlsRG16I:
240 return SPIRType::Int;
241
242 case PlsRGB10A2UI:
243 case PlsRGBA8UI:
244 case PlsRG16UI:
245 case PlsR32UI:
246 return SPIRType::UInt;
247 }
248 }
249
pls_format_to_components(PlsFormat format)250 static uint32_t pls_format_to_components(PlsFormat format)
251 {
252 switch (format)
253 {
254 default:
255 case PlsR32F:
256 case PlsR32UI:
257 return 1;
258
259 case PlsRG16F:
260 case PlsRG16:
261 case PlsRG16UI:
262 case PlsRG16I:
263 return 2;
264
265 case PlsR11FG11FB10F:
266 return 3;
267
268 case PlsRGB10A2:
269 case PlsRGBA8:
270 case PlsRGBA8I:
271 case PlsRGB10A2UI:
272 case PlsRGBA8UI:
273 return 4;
274 }
275 }
276
vector_swizzle(int vecsize,int index)277 const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
278 {
279 static const char *const swizzle[4][4] = {
280 { ".x", ".y", ".z", ".w" },
281 { ".xy", ".yz", ".zw", nullptr },
282 { ".xyz", ".yzw", nullptr, nullptr },
283 #if defined(__GNUC__) && (__GNUC__ == 9)
284 // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
285 // This array ends up being compiled as all nullptrs, tripping the assertions below.
286 { "", nullptr, nullptr, "$" },
287 #else
288 { "", nullptr, nullptr, nullptr },
289 #endif
290 };
291
292 assert(vecsize >= 1 && vecsize <= 4);
293 assert(index >= 0 && index < 4);
294 assert(swizzle[vecsize - 1][index]);
295
296 return swizzle[vecsize - 1][index];
297 }
298
reset()299 void CompilerGLSL::reset()
300 {
301 // We do some speculative optimizations which should pretty much always work out,
302 // but just in case the SPIR-V is rather weird, recompile until it's happy.
303 // This typically only means one extra pass.
304 clear_force_recompile();
305
306 // Clear invalid expression tracking.
307 invalid_expressions.clear();
308 current_function = nullptr;
309
310 // Clear temporary usage tracking.
311 expression_usage_counts.clear();
312 forwarded_temporaries.clear();
313 suppressed_usage_tracking.clear();
314
315 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
316 flushed_phi_variables.clear();
317
318 reset_name_caches();
319
320 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
321 func.active = false;
322 func.flush_undeclared = true;
323 });
324
325 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
326
327 ir.reset_all_of_type<SPIRExpression>();
328 ir.reset_all_of_type<SPIRAccessChain>();
329
330 statement_count = 0;
331 indent = 0;
332 current_loop_level = 0;
333 }
334
remap_pls_variables()335 void CompilerGLSL::remap_pls_variables()
336 {
337 for (auto &input : pls_inputs)
338 {
339 auto &var = get<SPIRVariable>(input.id);
340
341 bool input_is_target = false;
342 if (var.storage == StorageClassUniformConstant)
343 {
344 auto &type = get<SPIRType>(var.basetype);
345 input_is_target = type.image.dim == DimSubpassData;
346 }
347
348 if (var.storage != StorageClassInput && !input_is_target)
349 SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
350 var.remapped_variable = true;
351 }
352
353 for (auto &output : pls_outputs)
354 {
355 auto &var = get<SPIRVariable>(output.id);
356 if (var.storage != StorageClassOutput)
357 SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
358 var.remapped_variable = true;
359 }
360 }
361
remap_ext_framebuffer_fetch(uint32_t input_attachment_index,uint32_t color_location,bool coherent)362 void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
363 {
364 subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
365 inout_color_attachments.push_back({ color_location, coherent });
366 }
367
location_is_framebuffer_fetch(uint32_t location) const368 bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
369 {
370 return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
371 [&](const std::pair<uint32_t, bool> &elem) {
372 return elem.first == location;
373 }) != end(inout_color_attachments);
374 }
375
location_is_non_coherent_framebuffer_fetch(uint32_t location) const376 bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
377 {
378 return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
379 [&](const std::pair<uint32_t, bool> &elem) {
380 return elem.first == location && !elem.second;
381 }) != end(inout_color_attachments);
382 }
383
find_static_extensions()384 void CompilerGLSL::find_static_extensions()
385 {
386 ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
387 if (type.basetype == SPIRType::Double)
388 {
389 if (options.es)
390 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
391 if (!options.es && options.version < 400)
392 require_extension_internal("GL_ARB_gpu_shader_fp64");
393 }
394 else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
395 {
396 if (options.es)
397 SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
398 if (!options.es)
399 require_extension_internal("GL_ARB_gpu_shader_int64");
400 }
401 else if (type.basetype == SPIRType::Half)
402 {
403 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
404 if (options.vulkan_semantics)
405 require_extension_internal("GL_EXT_shader_16bit_storage");
406 }
407 else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
408 {
409 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
410 if (options.vulkan_semantics)
411 require_extension_internal("GL_EXT_shader_8bit_storage");
412 }
413 else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
414 {
415 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
416 if (options.vulkan_semantics)
417 require_extension_internal("GL_EXT_shader_16bit_storage");
418 }
419 });
420
421 auto &execution = get_entry_point();
422 switch (execution.model)
423 {
424 case ExecutionModelGLCompute:
425 if (!options.es && options.version < 430)
426 require_extension_internal("GL_ARB_compute_shader");
427 if (options.es && options.version < 310)
428 SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
429 break;
430
431 case ExecutionModelGeometry:
432 if (options.es && options.version < 320)
433 require_extension_internal("GL_EXT_geometry_shader");
434 if (!options.es && options.version < 150)
435 require_extension_internal("GL_ARB_geometry_shader4");
436
437 if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
438 {
439 // Instanced GS is part of 400 core or this extension.
440 if (!options.es && options.version < 400)
441 require_extension_internal("GL_ARB_gpu_shader5");
442 }
443 break;
444
445 case ExecutionModelTessellationEvaluation:
446 case ExecutionModelTessellationControl:
447 if (options.es && options.version < 320)
448 require_extension_internal("GL_EXT_tessellation_shader");
449 if (!options.es && options.version < 400)
450 require_extension_internal("GL_ARB_tessellation_shader");
451 break;
452
453 case ExecutionModelRayGenerationKHR:
454 case ExecutionModelIntersectionKHR:
455 case ExecutionModelAnyHitKHR:
456 case ExecutionModelClosestHitKHR:
457 case ExecutionModelMissKHR:
458 case ExecutionModelCallableKHR:
459 // NV enums are aliases.
460 if (options.es || options.version < 460)
461 SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
462 if (!options.vulkan_semantics)
463 SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
464
465 // Need to figure out if we should target KHR or NV extension based on capabilities.
466 for (auto &cap : ir.declared_capabilities)
467 {
468 if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
469 cap == CapabilityRayTraversalPrimitiveCullingKHR)
470 {
471 ray_tracing_is_khr = true;
472 break;
473 }
474 }
475
476 if (ray_tracing_is_khr)
477 {
478 // In KHR ray tracing we pass payloads by pointer instead of location,
479 // so make sure we assign locations properly.
480 ray_tracing_khr_fixup_locations();
481 require_extension_internal("GL_EXT_ray_tracing");
482 }
483 else
484 require_extension_internal("GL_NV_ray_tracing");
485 break;
486
487 default:
488 break;
489 }
490
491 if (!pls_inputs.empty() || !pls_outputs.empty())
492 {
493 if (execution.model != ExecutionModelFragment)
494 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
495 require_extension_internal("GL_EXT_shader_pixel_local_storage");
496 }
497
498 if (!inout_color_attachments.empty())
499 {
500 if (execution.model != ExecutionModelFragment)
501 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
502 if (options.vulkan_semantics)
503 SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
504
505 bool has_coherent = false;
506 bool has_incoherent = false;
507
508 for (auto &att : inout_color_attachments)
509 {
510 if (att.second)
511 has_coherent = true;
512 else
513 has_incoherent = true;
514 }
515
516 if (has_coherent)
517 require_extension_internal("GL_EXT_shader_framebuffer_fetch");
518 if (has_incoherent)
519 require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent");
520 }
521
522 if (options.separate_shader_objects && !options.es && options.version < 410)
523 require_extension_internal("GL_ARB_separate_shader_objects");
524
525 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
526 {
527 if (!options.vulkan_semantics)
528 SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
529 if (options.es && options.version < 320)
530 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
531 else if (!options.es && options.version < 450)
532 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
533 require_extension_internal("GL_EXT_buffer_reference");
534 }
535 else if (ir.addressing_model != AddressingModelLogical)
536 {
537 SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
538 }
539
540 // Check for nonuniform qualifier and passthrough.
541 // Instead of looping over all decorations to find this, just look at capabilities.
542 for (auto &cap : ir.declared_capabilities)
543 {
544 switch (cap)
545 {
546 case CapabilityShaderNonUniformEXT:
547 if (!options.vulkan_semantics)
548 require_extension_internal("GL_NV_gpu_shader5");
549 else
550 require_extension_internal("GL_EXT_nonuniform_qualifier");
551 break;
552 case CapabilityRuntimeDescriptorArrayEXT:
553 if (!options.vulkan_semantics)
554 SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
555 require_extension_internal("GL_EXT_nonuniform_qualifier");
556 break;
557
558 case CapabilityGeometryShaderPassthroughNV:
559 if (execution.model == ExecutionModelGeometry)
560 {
561 require_extension_internal("GL_NV_geometry_shader_passthrough");
562 execution.geometry_passthrough = true;
563 }
564 break;
565
566 case CapabilityVariablePointers:
567 case CapabilityVariablePointersStorageBuffer:
568 SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
569
570 case CapabilityMultiView:
571 if (options.vulkan_semantics)
572 require_extension_internal("GL_EXT_multiview");
573 else
574 {
575 require_extension_internal("GL_OVR_multiview2");
576 if (options.ovr_multiview_view_count == 0)
577 SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
578 if (get_execution_model() != ExecutionModelVertex)
579 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
580 }
581 break;
582
583 case CapabilityRayQueryKHR:
584 if (options.es || options.version < 460 || !options.vulkan_semantics)
585 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
586 require_extension_internal("GL_EXT_ray_query");
587 ray_tracing_is_khr = true;
588 break;
589
590 case CapabilityRayTraversalPrimitiveCullingKHR:
591 if (options.es || options.version < 460 || !options.vulkan_semantics)
592 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
593 require_extension_internal("GL_EXT_ray_flags_primitive_culling");
594 ray_tracing_is_khr = true;
595 break;
596
597 default:
598 break;
599 }
600 }
601
602 if (options.ovr_multiview_view_count)
603 {
604 if (options.vulkan_semantics)
605 SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
606 if (get_execution_model() != ExecutionModelVertex)
607 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
608 require_extension_internal("GL_OVR_multiview2");
609 }
610 }
611
ray_tracing_khr_fixup_locations()612 void CompilerGLSL::ray_tracing_khr_fixup_locations()
613 {
614 uint32_t location = 0;
615 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
616 // Incoming payload storage can also be used for tracing.
617 if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
618 var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
619 return;
620 if (is_hidden_variable(var))
621 return;
622 set_decoration(var.self, DecorationLocation, location++);
623 });
624 }
625
compile()626 string CompilerGLSL::compile()
627 {
628 ir.fixup_reserved_names();
629
630 if (!options.vulkan_semantics)
631 {
632 // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
633 backend.nonuniform_qualifier = "";
634 backend.needs_row_major_load_workaround = true;
635 }
636 backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
637 backend.force_gl_in_out_block = true;
638 backend.supports_extensions = true;
639 backend.use_array_constructor = true;
640
641 backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320);
642
643 if (is_legacy_es())
644 backend.support_case_fallthrough = false;
645
646 // Scan the SPIR-V to find trivial uses of extensions.
647 fixup_type_alias();
648 reorder_type_alias();
649 build_function_control_flow_graphs_and_analyze();
650 find_static_extensions();
651 fixup_image_load_store_access();
652 update_active_builtins();
653 analyze_image_and_sampler_usage();
654 analyze_interlocked_resource_usage();
655 if (!inout_color_attachments.empty())
656 emit_inout_fragment_outputs_copy_to_subpass_inputs();
657
658 // Shaders might cast unrelated data to pointers of non-block types.
659 // Find all such instances and make sure we can cast the pointers to a synthesized block type.
660 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
661 analyze_non_block_pointer_types();
662
663 uint32_t pass_count = 0;
664 do
665 {
666 if (pass_count >= 3)
667 SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
668
669 reset();
670
671 buffer.reset();
672
673 emit_header();
674 emit_resources();
675 emit_extension_workarounds(get_execution_model());
676
677 emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
678
679 pass_count++;
680 } while (is_forcing_recompilation());
681
682 // Implement the interlocked wrapper function at the end.
683 // The body was implemented in lieu of main().
684 if (interlocked_is_complex)
685 {
686 statement("void main()");
687 begin_scope();
688 statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
689 if (options.es)
690 statement("beginInvocationInterlockNV();");
691 else
692 statement("beginInvocationInterlockARB();");
693 statement("spvMainInterlockedBody();");
694 if (options.es)
695 statement("endInvocationInterlockNV();");
696 else
697 statement("endInvocationInterlockARB();");
698 end_scope();
699 }
700
701 // Entry point in GLSL is always main().
702 get_entry_point().name = "main";
703
704 return buffer.str();
705 }
706
get_partial_source()707 std::string CompilerGLSL::get_partial_source()
708 {
709 return buffer.str();
710 }
711
build_workgroup_size(SmallVector<string> & arguments,const SpecializationConstant & wg_x,const SpecializationConstant & wg_y,const SpecializationConstant & wg_z)712 void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
713 const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
714 {
715 auto &execution = get_entry_point();
716
717 if (wg_x.id)
718 {
719 if (options.vulkan_semantics)
720 arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
721 else
722 arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
723 }
724 else
725 arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
726
727 if (wg_y.id)
728 {
729 if (options.vulkan_semantics)
730 arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
731 else
732 arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
733 }
734 else
735 arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
736
737 if (wg_z.id)
738 {
739 if (options.vulkan_semantics)
740 arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
741 else
742 arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
743 }
744 else
745 arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
746 }
747
request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)748 void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
749 {
750 if (options.vulkan_semantics)
751 {
752 auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
753 require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
754 }
755 else
756 {
757 if (!shader_subgroup_supporter.is_feature_requested(feature))
758 force_recompile();
759 shader_subgroup_supporter.request_feature(feature);
760 }
761 }
762
emit_header()763 void CompilerGLSL::emit_header()
764 {
765 auto &execution = get_entry_point();
766 statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
767
768 if (!options.es && options.version < 420)
769 {
770 // Needed for binding = # on UBOs, etc.
771 if (options.enable_420pack_extension)
772 {
773 statement("#ifdef GL_ARB_shading_language_420pack");
774 statement("#extension GL_ARB_shading_language_420pack : require");
775 statement("#endif");
776 }
777 // Needed for: layout(early_fragment_tests) in;
778 if (execution.flags.get(ExecutionModeEarlyFragmentTests))
779 require_extension_internal("GL_ARB_shader_image_load_store");
780 }
781
782 // Needed for: layout(post_depth_coverage) in;
783 if (execution.flags.get(ExecutionModePostDepthCoverage))
784 require_extension_internal("GL_ARB_post_depth_coverage");
785
786 // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
787 if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
788 execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
789 execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
790 execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
791 {
792 if (options.es)
793 {
794 if (options.version < 310)
795 SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
796 require_extension_internal("GL_NV_fragment_shader_interlock");
797 }
798 else
799 {
800 if (options.version < 420)
801 require_extension_internal("GL_ARB_shader_image_load_store");
802 require_extension_internal("GL_ARB_fragment_shader_interlock");
803 }
804 }
805
806 for (auto &ext : forced_extensions)
807 {
808 if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
809 {
810 // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
811 // GL_AMD_gpu_shader_half_float is a superset, so try that first.
812 statement("#if defined(GL_AMD_gpu_shader_half_float)");
813 statement("#extension GL_AMD_gpu_shader_half_float : require");
814 if (!options.vulkan_semantics)
815 {
816 statement("#elif defined(GL_NV_gpu_shader5)");
817 statement("#extension GL_NV_gpu_shader5 : require");
818 }
819 else
820 {
821 statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
822 statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
823 }
824 statement("#else");
825 statement("#error No extension available for FP16.");
826 statement("#endif");
827 }
828 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
829 {
830 if (options.vulkan_semantics)
831 statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
832 else
833 {
834 statement("#if defined(GL_AMD_gpu_shader_int16)");
835 statement("#extension GL_AMD_gpu_shader_int16 : require");
836 statement("#elif defined(GL_NV_gpu_shader5)");
837 statement("#extension GL_NV_gpu_shader5 : require");
838 statement("#else");
839 statement("#error No extension available for Int16.");
840 statement("#endif");
841 }
842 }
843 else if (ext == "GL_ARB_post_depth_coverage")
844 {
845 if (options.es)
846 statement("#extension GL_EXT_post_depth_coverage : require");
847 else
848 {
849 statement("#if defined(GL_ARB_post_depth_coverge)");
850 statement("#extension GL_ARB_post_depth_coverage : require");
851 statement("#else");
852 statement("#extension GL_EXT_post_depth_coverage : require");
853 statement("#endif");
854 }
855 }
856 else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
857 {
858 // Soft-enable this extension on plain GLSL.
859 statement("#ifdef ", ext);
860 statement("#extension ", ext, " : enable");
861 statement("#endif");
862 }
863 else if (ext == "GL_EXT_control_flow_attributes")
864 {
865 // These are just hints so we can conditionally enable and fallback in the shader.
866 statement("#if defined(GL_EXT_control_flow_attributes)");
867 statement("#extension GL_EXT_control_flow_attributes : require");
868 statement("#define SPIRV_CROSS_FLATTEN [[flatten]]");
869 statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
870 statement("#define SPIRV_CROSS_UNROLL [[unroll]]");
871 statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]");
872 statement("#else");
873 statement("#define SPIRV_CROSS_FLATTEN");
874 statement("#define SPIRV_CROSS_BRANCH");
875 statement("#define SPIRV_CROSS_UNROLL");
876 statement("#define SPIRV_CROSS_LOOP");
877 statement("#endif");
878 }
879 else
880 statement("#extension ", ext, " : require");
881 }
882
883 if (!options.vulkan_semantics)
884 {
885 using Supp = ShaderSubgroupSupportHelper;
886 auto result = shader_subgroup_supporter.resolve();
887
888 for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
889 {
890 auto feature = static_cast<Supp::Feature>(feature_index);
891 if (!shader_subgroup_supporter.is_feature_requested(feature))
892 continue;
893
894 auto exts = Supp::get_candidates_for_feature(feature, result);
895 if (exts.empty())
896 continue;
897
898 statement("");
899
900 for (auto &ext : exts)
901 {
902 const char *name = Supp::get_extension_name(ext);
903 const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
904 auto extra_names = Supp::get_extra_required_extension_names(ext);
905 statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
906 (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
907 for (const auto &e : extra_names)
908 statement("#extension ", e, " : enable");
909 statement("#extension ", name, " : require");
910 }
911
912 if (!Supp::can_feature_be_implemented_without_extensions(feature))
913 {
914 statement("#else");
915 statement("#error No extensions available to emulate requested subgroup feature.");
916 }
917
918 statement("#endif");
919 }
920 }
921
922 for (auto &header : header_lines)
923 statement(header);
924
925 SmallVector<string> inputs;
926 SmallVector<string> outputs;
927
928 switch (execution.model)
929 {
930 case ExecutionModelVertex:
931 if (options.ovr_multiview_view_count)
932 inputs.push_back(join("num_views = ", options.ovr_multiview_view_count));
933 break;
934 case ExecutionModelGeometry:
935 if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
936 inputs.push_back(join("invocations = ", execution.invocations));
937 if (execution.flags.get(ExecutionModeInputPoints))
938 inputs.push_back("points");
939 if (execution.flags.get(ExecutionModeInputLines))
940 inputs.push_back("lines");
941 if (execution.flags.get(ExecutionModeInputLinesAdjacency))
942 inputs.push_back("lines_adjacency");
943 if (execution.flags.get(ExecutionModeTriangles))
944 inputs.push_back("triangles");
945 if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
946 inputs.push_back("triangles_adjacency");
947
948 if (!execution.geometry_passthrough)
949 {
950 // For passthrough, these are implies and cannot be declared in shader.
951 outputs.push_back(join("max_vertices = ", execution.output_vertices));
952 if (execution.flags.get(ExecutionModeOutputTriangleStrip))
953 outputs.push_back("triangle_strip");
954 if (execution.flags.get(ExecutionModeOutputPoints))
955 outputs.push_back("points");
956 if (execution.flags.get(ExecutionModeOutputLineStrip))
957 outputs.push_back("line_strip");
958 }
959 break;
960
961 case ExecutionModelTessellationControl:
962 if (execution.flags.get(ExecutionModeOutputVertices))
963 outputs.push_back(join("vertices = ", execution.output_vertices));
964 break;
965
966 case ExecutionModelTessellationEvaluation:
967 if (execution.flags.get(ExecutionModeQuads))
968 inputs.push_back("quads");
969 if (execution.flags.get(ExecutionModeTriangles))
970 inputs.push_back("triangles");
971 if (execution.flags.get(ExecutionModeIsolines))
972 inputs.push_back("isolines");
973 if (execution.flags.get(ExecutionModePointMode))
974 inputs.push_back("point_mode");
975
976 if (!execution.flags.get(ExecutionModeIsolines))
977 {
978 if (execution.flags.get(ExecutionModeVertexOrderCw))
979 inputs.push_back("cw");
980 if (execution.flags.get(ExecutionModeVertexOrderCcw))
981 inputs.push_back("ccw");
982 }
983
984 if (execution.flags.get(ExecutionModeSpacingFractionalEven))
985 inputs.push_back("fractional_even_spacing");
986 if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
987 inputs.push_back("fractional_odd_spacing");
988 if (execution.flags.get(ExecutionModeSpacingEqual))
989 inputs.push_back("equal_spacing");
990 break;
991
992 case ExecutionModelGLCompute:
993 {
994 if (execution.workgroup_size.constant != 0)
995 {
996 SpecializationConstant wg_x, wg_y, wg_z;
997 get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
998
999 // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
1000 // declarations before we can emit the work group size.
1001 if (options.vulkan_semantics ||
1002 ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
1003 build_workgroup_size(inputs, wg_x, wg_y, wg_z);
1004 }
1005 else
1006 {
1007 inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
1008 inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
1009 inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
1010 }
1011 break;
1012 }
1013
1014 case ExecutionModelFragment:
1015 if (options.es)
1016 {
1017 switch (options.fragment.default_float_precision)
1018 {
1019 case Options::Lowp:
1020 statement("precision lowp float;");
1021 break;
1022
1023 case Options::Mediump:
1024 statement("precision mediump float;");
1025 break;
1026
1027 case Options::Highp:
1028 statement("precision highp float;");
1029 break;
1030
1031 default:
1032 break;
1033 }
1034
1035 switch (options.fragment.default_int_precision)
1036 {
1037 case Options::Lowp:
1038 statement("precision lowp int;");
1039 break;
1040
1041 case Options::Mediump:
1042 statement("precision mediump int;");
1043 break;
1044
1045 case Options::Highp:
1046 statement("precision highp int;");
1047 break;
1048
1049 default:
1050 break;
1051 }
1052 }
1053
1054 if (execution.flags.get(ExecutionModeEarlyFragmentTests))
1055 inputs.push_back("early_fragment_tests");
1056 if (execution.flags.get(ExecutionModePostDepthCoverage))
1057 inputs.push_back("post_depth_coverage");
1058
1059 if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
1060 inputs.push_back("pixel_interlock_ordered");
1061 else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
1062 inputs.push_back("pixel_interlock_unordered");
1063 else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
1064 inputs.push_back("sample_interlock_ordered");
1065 else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
1066 inputs.push_back("sample_interlock_unordered");
1067
1068 if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
1069 statement("layout(depth_greater) out float gl_FragDepth;");
1070 else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
1071 statement("layout(depth_less) out float gl_FragDepth;");
1072
1073 break;
1074
1075 default:
1076 break;
1077 }
1078
1079 for (auto &cap : ir.declared_capabilities)
1080 if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
1081 statement("layout(primitive_culling);");
1082
1083 if (!inputs.empty())
1084 statement("layout(", merge(inputs), ") in;");
1085 if (!outputs.empty())
1086 statement("layout(", merge(outputs), ") out;");
1087
1088 statement("");
1089 }
1090
type_is_empty(const SPIRType & type)1091 bool CompilerGLSL::type_is_empty(const SPIRType &type)
1092 {
1093 return type.basetype == SPIRType::Struct && type.member_types.empty();
1094 }
1095
emit_struct(SPIRType & type)1096 void CompilerGLSL::emit_struct(SPIRType &type)
1097 {
1098 // Struct types can be stamped out multiple times
1099 // with just different offsets, matrix layouts, etc ...
1100 // Type-punning with these types is legal, which complicates things
1101 // when we are storing struct and array types in an SSBO for example.
1102 // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
1103 if (type.type_alias != TypeID(0) &&
1104 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
1105 return;
1106
1107 add_resource_name(type.self);
1108 auto name = type_to_glsl(type);
1109
1110 statement(!backend.explicit_struct_type ? "struct " : "", name);
1111 begin_scope();
1112
1113 type.member_name_cache.clear();
1114
1115 uint32_t i = 0;
1116 bool emitted = false;
1117 for (auto &member : type.member_types)
1118 {
1119 add_member_name(type, i);
1120 emit_struct_member(type, member, i);
1121 i++;
1122 emitted = true;
1123 }
1124
1125 // Don't declare empty structs in GLSL, this is not allowed.
1126 if (type_is_empty(type) && !backend.supports_empty_struct)
1127 {
1128 statement("int empty_struct_member;");
1129 emitted = true;
1130 }
1131
1132 if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
1133 emit_struct_padding_target(type);
1134
1135 end_scope_decl();
1136
1137 if (emitted)
1138 statement("");
1139 }
1140
to_interpolation_qualifiers(const Bitset & flags)1141 string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
1142 {
1143 string res;
1144 //if (flags & (1ull << DecorationSmooth))
1145 // res += "smooth ";
1146 if (flags.get(DecorationFlat))
1147 res += "flat ";
1148 if (flags.get(DecorationNoPerspective))
1149 res += "noperspective ";
1150 if (flags.get(DecorationCentroid))
1151 res += "centroid ";
1152 if (flags.get(DecorationPatch))
1153 res += "patch ";
1154 if (flags.get(DecorationSample))
1155 res += "sample ";
1156 if (flags.get(DecorationInvariant))
1157 res += "invariant ";
1158
1159 if (flags.get(DecorationExplicitInterpAMD))
1160 {
1161 require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
1162 res += "__explicitInterpAMD ";
1163 }
1164
1165 if (flags.get(DecorationPerVertexNV))
1166 {
1167 if (options.es && options.version < 320)
1168 SPIRV_CROSS_THROW("pervertexNV requires ESSL 320.");
1169 else if (!options.es && options.version < 450)
1170 SPIRV_CROSS_THROW("pervertexNV requires GLSL 450.");
1171 require_extension_internal("GL_NV_fragment_shader_barycentric");
1172 res += "pervertexNV ";
1173 }
1174
1175 return res;
1176 }
1177
layout_for_member(const SPIRType & type,uint32_t index)1178 string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
1179 {
1180 if (is_legacy())
1181 return "";
1182
1183 bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1184 if (!is_block)
1185 return "";
1186
1187 auto &memb = ir.meta[type.self].members;
1188 if (index >= memb.size())
1189 return "";
1190 auto &dec = memb[index];
1191
1192 SmallVector<string> attr;
1193
1194 if (has_member_decoration(type.self, index, DecorationPassthroughNV))
1195 attr.push_back("passthrough");
1196
1197 // We can only apply layouts on members in block interfaces.
1198 // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
1199 // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
1200 // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
1201 //
1202 // We would like to go from (SPIR-V style):
1203 //
1204 // struct Foo { layout(row_major) mat4 matrix; };
1205 // buffer UBO { Foo foo; };
1206 //
1207 // to
1208 //
1209 // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
1210 // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
1211 auto flags = combined_decoration_for_member(type, index);
1212
1213 if (flags.get(DecorationRowMajor))
1214 attr.push_back("row_major");
1215 // We don't emit any global layouts, so column_major is default.
1216 //if (flags & (1ull << DecorationColMajor))
1217 // attr.push_back("column_major");
1218
1219 if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
1220 attr.push_back(join("location = ", dec.location));
1221
1222 // Can only declare component if we can declare location.
1223 if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
1224 {
1225 if (!options.es)
1226 {
1227 if (options.version < 440 && options.version >= 140)
1228 require_extension_internal("GL_ARB_enhanced_layouts");
1229 else if (options.version < 140)
1230 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1231 attr.push_back(join("component = ", dec.component));
1232 }
1233 else
1234 SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1235 }
1236
1237 // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
1238 // This is only done selectively in GLSL as needed.
1239 if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
1240 dec.decoration_flags.get(DecorationOffset))
1241 attr.push_back(join("offset = ", dec.offset));
1242 else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
1243 attr.push_back(join("xfb_offset = ", dec.offset));
1244
1245 if (attr.empty())
1246 return "";
1247
1248 string res = "layout(";
1249 res += merge(attr);
1250 res += ") ";
1251 return res;
1252 }
1253
format_to_glsl(spv::ImageFormat format)1254 const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1255 {
1256 if (options.es && is_desktop_only_format(format))
1257 SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1258
1259 switch (format)
1260 {
1261 case ImageFormatRgba32f:
1262 return "rgba32f";
1263 case ImageFormatRgba16f:
1264 return "rgba16f";
1265 case ImageFormatR32f:
1266 return "r32f";
1267 case ImageFormatRgba8:
1268 return "rgba8";
1269 case ImageFormatRgba8Snorm:
1270 return "rgba8_snorm";
1271 case ImageFormatRg32f:
1272 return "rg32f";
1273 case ImageFormatRg16f:
1274 return "rg16f";
1275 case ImageFormatRgba32i:
1276 return "rgba32i";
1277 case ImageFormatRgba16i:
1278 return "rgba16i";
1279 case ImageFormatR32i:
1280 return "r32i";
1281 case ImageFormatRgba8i:
1282 return "rgba8i";
1283 case ImageFormatRg32i:
1284 return "rg32i";
1285 case ImageFormatRg16i:
1286 return "rg16i";
1287 case ImageFormatRgba32ui:
1288 return "rgba32ui";
1289 case ImageFormatRgba16ui:
1290 return "rgba16ui";
1291 case ImageFormatR32ui:
1292 return "r32ui";
1293 case ImageFormatRgba8ui:
1294 return "rgba8ui";
1295 case ImageFormatRg32ui:
1296 return "rg32ui";
1297 case ImageFormatRg16ui:
1298 return "rg16ui";
1299 case ImageFormatR11fG11fB10f:
1300 return "r11f_g11f_b10f";
1301 case ImageFormatR16f:
1302 return "r16f";
1303 case ImageFormatRgb10A2:
1304 return "rgb10_a2";
1305 case ImageFormatR8:
1306 return "r8";
1307 case ImageFormatRg8:
1308 return "rg8";
1309 case ImageFormatR16:
1310 return "r16";
1311 case ImageFormatRg16:
1312 return "rg16";
1313 case ImageFormatRgba16:
1314 return "rgba16";
1315 case ImageFormatR16Snorm:
1316 return "r16_snorm";
1317 case ImageFormatRg16Snorm:
1318 return "rg16_snorm";
1319 case ImageFormatRgba16Snorm:
1320 return "rgba16_snorm";
1321 case ImageFormatR8Snorm:
1322 return "r8_snorm";
1323 case ImageFormatRg8Snorm:
1324 return "rg8_snorm";
1325 case ImageFormatR8ui:
1326 return "r8ui";
1327 case ImageFormatRg8ui:
1328 return "rg8ui";
1329 case ImageFormatR16ui:
1330 return "r16ui";
1331 case ImageFormatRgb10a2ui:
1332 return "rgb10_a2ui";
1333 case ImageFormatR8i:
1334 return "r8i";
1335 case ImageFormatRg8i:
1336 return "rg8i";
1337 case ImageFormatR16i:
1338 return "r16i";
1339 default:
1340 case ImageFormatUnknown:
1341 return nullptr;
1342 }
1343 }
1344
type_to_packed_base_size(const SPIRType & type,BufferPackingStandard)1345 uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1346 {
1347 switch (type.basetype)
1348 {
1349 case SPIRType::Double:
1350 case SPIRType::Int64:
1351 case SPIRType::UInt64:
1352 return 8;
1353 case SPIRType::Float:
1354 case SPIRType::Int:
1355 case SPIRType::UInt:
1356 return 4;
1357 case SPIRType::Half:
1358 case SPIRType::Short:
1359 case SPIRType::UShort:
1360 return 2;
1361 case SPIRType::SByte:
1362 case SPIRType::UByte:
1363 return 1;
1364
1365 default:
1366 SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1367 }
1368 }
1369
type_to_packed_alignment(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1370 uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1371 BufferPackingStandard packing)
1372 {
1373 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1374 // and is 64-bit.
1375 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1376 {
1377 if (!type.pointer)
1378 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1379
1380 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1381 {
1382 if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1383 return 16;
1384 else
1385 return 8;
1386 }
1387 else
1388 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1389 }
1390
1391 if (!type.array.empty())
1392 {
1393 uint32_t minimum_alignment = 1;
1394 if (packing_is_vec4_padded(packing))
1395 minimum_alignment = 16;
1396
1397 auto *tmp = &get<SPIRType>(type.parent_type);
1398 while (!tmp->array.empty())
1399 tmp = &get<SPIRType>(tmp->parent_type);
1400
1401 // Get the alignment of the base type, then maybe round up.
1402 return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
1403 }
1404
1405 if (type.basetype == SPIRType::Struct)
1406 {
1407 // Rule 9. Structs alignments are maximum alignment of its members.
1408 uint32_t alignment = 1;
1409 for (uint32_t i = 0; i < type.member_types.size(); i++)
1410 {
1411 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1412 alignment =
1413 max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
1414 }
1415
1416 // In std140, struct alignment is rounded up to 16.
1417 if (packing_is_vec4_padded(packing))
1418 alignment = max(alignment, 16u);
1419
1420 return alignment;
1421 }
1422 else
1423 {
1424 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1425
1426 // Alignment requirement for scalar block layout is always the alignment for the most basic component.
1427 if (packing_is_scalar(packing))
1428 return base_alignment;
1429
1430 // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1431 // a vec4, this is handled outside since that part knows our current offset.
1432 if (type.columns == 1 && packing_is_hlsl(packing))
1433 return base_alignment;
1434
1435 // From 7.6.2.2 in GL 4.5 core spec.
1436 // Rule 1
1437 if (type.vecsize == 1 && type.columns == 1)
1438 return base_alignment;
1439
1440 // Rule 2
1441 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1442 return type.vecsize * base_alignment;
1443
1444 // Rule 3
1445 if (type.vecsize == 3 && type.columns == 1)
1446 return 4 * base_alignment;
1447
1448 // Rule 4 implied. Alignment does not change in std430.
1449
1450 // Rule 5. Column-major matrices are stored as arrays of
1451 // vectors.
1452 if (flags.get(DecorationColMajor) && type.columns > 1)
1453 {
1454 if (packing_is_vec4_padded(packing))
1455 return 4 * base_alignment;
1456 else if (type.vecsize == 3)
1457 return 4 * base_alignment;
1458 else
1459 return type.vecsize * base_alignment;
1460 }
1461
1462 // Rule 6 implied.
1463
1464 // Rule 7.
1465 if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1466 {
1467 if (packing_is_vec4_padded(packing))
1468 return 4 * base_alignment;
1469 else if (type.columns == 3)
1470 return 4 * base_alignment;
1471 else
1472 return type.columns * base_alignment;
1473 }
1474
1475 // Rule 8 implied.
1476 }
1477
1478 SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1479 }
1480
type_to_packed_array_stride(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1481 uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1482 BufferPackingStandard packing)
1483 {
1484 // Array stride is equal to aligned size of the underlying type.
1485 uint32_t parent = type.parent_type;
1486 assert(parent);
1487
1488 auto &tmp = get<SPIRType>(parent);
1489
1490 uint32_t size = type_to_packed_size(tmp, flags, packing);
1491 uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1492 return (size + alignment - 1) & ~(alignment - 1);
1493 }
1494
type_to_packed_size(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1495 uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1496 {
1497 if (!type.array.empty())
1498 {
1499 uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1500
1501 // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
1502 // so that it is possible to pack other vectors into the last element.
1503 if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
1504 packed_size -= (4 - type.vecsize) * (type.width / 8);
1505
1506 return packed_size;
1507 }
1508
1509 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1510 // and is 64-bit.
1511 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1512 {
1513 if (!type.pointer)
1514 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1515
1516 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1517 return 8;
1518 else
1519 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1520 }
1521
1522 uint32_t size = 0;
1523
1524 if (type.basetype == SPIRType::Struct)
1525 {
1526 uint32_t pad_alignment = 1;
1527
1528 for (uint32_t i = 0; i < type.member_types.size(); i++)
1529 {
1530 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1531 auto &member_type = get<SPIRType>(type.member_types[i]);
1532
1533 uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
1534 uint32_t alignment = max(packed_alignment, pad_alignment);
1535
1536 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1537 // GL 4.5 spec, 7.6.2.2.
1538 if (member_type.basetype == SPIRType::Struct)
1539 pad_alignment = packed_alignment;
1540 else
1541 pad_alignment = 1;
1542
1543 size = (size + alignment - 1) & ~(alignment - 1);
1544 size += type_to_packed_size(member_type, member_flags, packing);
1545 }
1546 }
1547 else
1548 {
1549 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1550
1551 if (packing_is_scalar(packing))
1552 {
1553 size = type.vecsize * type.columns * base_alignment;
1554 }
1555 else
1556 {
1557 if (type.columns == 1)
1558 size = type.vecsize * base_alignment;
1559
1560 if (flags.get(DecorationColMajor) && type.columns > 1)
1561 {
1562 if (packing_is_vec4_padded(packing))
1563 size = type.columns * 4 * base_alignment;
1564 else if (type.vecsize == 3)
1565 size = type.columns * 4 * base_alignment;
1566 else
1567 size = type.columns * type.vecsize * base_alignment;
1568 }
1569
1570 if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1571 {
1572 if (packing_is_vec4_padded(packing))
1573 size = type.vecsize * 4 * base_alignment;
1574 else if (type.columns == 3)
1575 size = type.vecsize * 4 * base_alignment;
1576 else
1577 size = type.vecsize * type.columns * base_alignment;
1578 }
1579
1580 // For matrices in HLSL, the last element has a size which depends on its vector size,
1581 // so that it is possible to pack other vectors into the last element.
1582 if (packing_is_hlsl(packing) && type.columns > 1)
1583 size -= (4 - type.vecsize) * (type.width / 8);
1584 }
1585 }
1586
1587 return size;
1588 }
1589
buffer_is_packing_standard(const SPIRType & type,BufferPackingStandard packing,uint32_t * failed_validation_index,uint32_t start_offset,uint32_t end_offset)1590 bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1591 uint32_t *failed_validation_index, uint32_t start_offset,
1592 uint32_t end_offset)
1593 {
1594 // This is very tricky and error prone, but try to be exhaustive and correct here.
1595 // SPIR-V doesn't directly say if we're using std430 or std140.
1596 // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1597 // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1598 // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1599 //
1600 // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1601 // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1602 //
1603 // The only two differences between std140 and std430 are related to padding alignment/array stride
1604 // in arrays and structs. In std140 they take minimum vec4 alignment.
1605 // std430 only removes the vec4 requirement.
1606
1607 uint32_t offset = 0;
1608 uint32_t pad_alignment = 1;
1609
1610 bool is_top_level_block =
1611 has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1612
1613 for (uint32_t i = 0; i < type.member_types.size(); i++)
1614 {
1615 auto &memb_type = get<SPIRType>(type.member_types[i]);
1616 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1617
1618 // Verify alignment rules.
1619 uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
1620
1621 // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1622 // layout(constant_id = 0) const int s = 10;
1623 // const int S = s + 5; // SpecConstantOp
1624 // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1625 // we would need full implementation of compile-time constant folding. :(
1626 // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1627 // for our analysis (e.g. unsized arrays).
1628 // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1629 // Querying size of this member will fail, so just don't call it unless we have to.
1630 //
1631 // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1632 bool member_can_be_unsized =
1633 is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1634
1635 uint32_t packed_size = 0;
1636 if (!member_can_be_unsized || packing_is_hlsl(packing))
1637 packed_size = type_to_packed_size(memb_type, member_flags, packing);
1638
1639 // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1640 if (packing_is_hlsl(packing))
1641 {
1642 // If a member straddles across a vec4 boundary, alignment is actually vec4.
1643 uint32_t begin_word = offset / 16;
1644 uint32_t end_word = (offset + packed_size - 1) / 16;
1645 if (begin_word != end_word)
1646 packed_alignment = max(packed_alignment, 16u);
1647 }
1648
1649 uint32_t actual_offset = type_struct_member_offset(type, i);
1650 // Field is not in the specified range anymore and we can ignore any further fields.
1651 if (actual_offset >= end_offset)
1652 break;
1653
1654 uint32_t alignment = max(packed_alignment, pad_alignment);
1655 offset = (offset + alignment - 1) & ~(alignment - 1);
1656
1657 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1658 // GL 4.5 spec, 7.6.2.2.
1659 if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1660 pad_alignment = packed_alignment;
1661 else
1662 pad_alignment = 1;
1663
1664 // Only care about packing if we are in the given range
1665 if (actual_offset >= start_offset)
1666 {
1667 // We only care about offsets in std140, std430, etc ...
1668 // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1669 if (!packing_has_flexible_offset(packing))
1670 {
1671 if (actual_offset != offset) // This cannot be the packing we're looking for.
1672 {
1673 if (failed_validation_index)
1674 *failed_validation_index = i;
1675 return false;
1676 }
1677 }
1678 else if ((actual_offset & (alignment - 1)) != 0)
1679 {
1680 // We still need to verify that alignment rules are observed, even if we have explicit offset.
1681 if (failed_validation_index)
1682 *failed_validation_index = i;
1683 return false;
1684 }
1685
1686 // Verify array stride rules.
1687 if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
1688 type_struct_member_array_stride(type, i))
1689 {
1690 if (failed_validation_index)
1691 *failed_validation_index = i;
1692 return false;
1693 }
1694
1695 // Verify that sub-structs also follow packing rules.
1696 // We cannot use enhanced layouts on substructs, so they better be up to spec.
1697 auto substruct_packing = packing_to_substruct_packing(packing);
1698
1699 if (!memb_type.pointer && !memb_type.member_types.empty() &&
1700 !buffer_is_packing_standard(memb_type, substruct_packing))
1701 {
1702 if (failed_validation_index)
1703 *failed_validation_index = i;
1704 return false;
1705 }
1706 }
1707
1708 // Bump size.
1709 offset = actual_offset + packed_size;
1710 }
1711
1712 return true;
1713 }
1714
can_use_io_location(StorageClass storage,bool block)1715 bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1716 {
1717 // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1718 // Be very explicit here about how to solve the issue.
1719 if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1720 (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1721 {
1722 uint32_t minimum_desktop_version = block ? 440 : 410;
1723 // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1724
1725 if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1726 return false;
1727 else if (options.es && options.version < 310)
1728 return false;
1729 }
1730
1731 if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1732 (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1733 {
1734 if (options.es && options.version < 300)
1735 return false;
1736 else if (!options.es && options.version < 330)
1737 return false;
1738 }
1739
1740 if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1741 {
1742 if (options.es && options.version < 310)
1743 return false;
1744 else if (!options.es && options.version < 430)
1745 return false;
1746 }
1747
1748 return true;
1749 }
1750
layout_for_variable(const SPIRVariable & var)1751 string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1752 {
1753 // FIXME: Come up with a better solution for when to disable layouts.
1754 // Having layouts depend on extensions as well as which types
1755 // of layouts are used. For now, the simple solution is to just disable
1756 // layouts for legacy versions.
1757 if (is_legacy())
1758 return "";
1759
1760 if (subpass_input_is_framebuffer_fetch(var.self))
1761 return "";
1762
1763 SmallVector<string> attr;
1764
1765 auto &type = get<SPIRType>(var.basetype);
1766 auto &flags = get_decoration_bitset(var.self);
1767 auto &typeflags = get_decoration_bitset(type.self);
1768
1769 if (flags.get(DecorationPassthroughNV))
1770 attr.push_back("passthrough");
1771
1772 if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1773 attr.push_back("push_constant");
1774 else if (var.storage == StorageClassShaderRecordBufferKHR)
1775 attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
1776
1777 if (flags.get(DecorationRowMajor))
1778 attr.push_back("row_major");
1779 if (flags.get(DecorationColMajor))
1780 attr.push_back("column_major");
1781
1782 if (options.vulkan_semantics)
1783 {
1784 if (flags.get(DecorationInputAttachmentIndex))
1785 attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
1786 }
1787
1788 bool is_block = has_decoration(type.self, DecorationBlock);
1789 if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
1790 {
1791 Bitset combined_decoration;
1792 for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1793 combined_decoration.merge_or(combined_decoration_for_member(type, i));
1794
1795 // If our members have location decorations, we don't need to
1796 // emit location decorations at the top as well (looks weird).
1797 if (!combined_decoration.get(DecorationLocation))
1798 attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
1799 }
1800
1801 if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
1802 location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation)))
1803 {
1804 attr.push_back("noncoherent");
1805 }
1806
1807 // Transform feedback
1808 bool uses_enhanced_layouts = false;
1809 if (is_block && var.storage == StorageClassOutput)
1810 {
1811 // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
1812 // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
1813 // is the xfb_offset.
1814 uint32_t member_count = uint32_t(type.member_types.size());
1815 bool have_xfb_buffer_stride = false;
1816 bool have_any_xfb_offset = false;
1817 bool have_geom_stream = false;
1818 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
1819
1820 if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
1821 {
1822 have_xfb_buffer_stride = true;
1823 xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
1824 xfb_stride = get_decoration(var.self, DecorationXfbStride);
1825 }
1826
1827 if (flags.get(DecorationStream))
1828 {
1829 have_geom_stream = true;
1830 geom_stream = get_decoration(var.self, DecorationStream);
1831 }
1832
1833 // Verify that none of the members violate our assumption.
1834 for (uint32_t i = 0; i < member_count; i++)
1835 {
1836 if (has_member_decoration(type.self, i, DecorationStream))
1837 {
1838 uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
1839 if (have_geom_stream && member_geom_stream != geom_stream)
1840 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
1841 have_geom_stream = true;
1842 geom_stream = member_geom_stream;
1843 }
1844
1845 // Only members with an Offset decoration participate in XFB.
1846 if (!has_member_decoration(type.self, i, DecorationOffset))
1847 continue;
1848 have_any_xfb_offset = true;
1849
1850 if (has_member_decoration(type.self, i, DecorationXfbBuffer))
1851 {
1852 uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
1853 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
1854 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
1855 have_xfb_buffer_stride = true;
1856 xfb_buffer = buffer_index;
1857 }
1858
1859 if (has_member_decoration(type.self, i, DecorationXfbStride))
1860 {
1861 uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
1862 if (have_xfb_buffer_stride && stride != xfb_stride)
1863 SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
1864 have_xfb_buffer_stride = true;
1865 xfb_stride = stride;
1866 }
1867 }
1868
1869 if (have_xfb_buffer_stride && have_any_xfb_offset)
1870 {
1871 attr.push_back(join("xfb_buffer = ", xfb_buffer));
1872 attr.push_back(join("xfb_stride = ", xfb_stride));
1873 uses_enhanced_layouts = true;
1874 }
1875
1876 if (have_geom_stream)
1877 {
1878 if (get_execution_model() != ExecutionModelGeometry)
1879 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1880 if (options.es)
1881 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1882 if (options.version < 400)
1883 require_extension_internal("GL_ARB_transform_feedback3");
1884 attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1885 }
1886 }
1887 else if (var.storage == StorageClassOutput)
1888 {
1889 if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
1890 {
1891 // XFB for standalone variables, we can emit all decorations.
1892 attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
1893 attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
1894 attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
1895 uses_enhanced_layouts = true;
1896 }
1897
1898 if (flags.get(DecorationStream))
1899 {
1900 if (get_execution_model() != ExecutionModelGeometry)
1901 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1902 if (options.es)
1903 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1904 if (options.version < 400)
1905 require_extension_internal("GL_ARB_transform_feedback3");
1906 attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1907 }
1908 }
1909
1910 // Can only declare Component if we can declare location.
1911 if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
1912 {
1913 uses_enhanced_layouts = true;
1914 attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
1915 }
1916
1917 if (uses_enhanced_layouts)
1918 {
1919 if (!options.es)
1920 {
1921 if (options.version < 440 && options.version >= 140)
1922 require_extension_internal("GL_ARB_enhanced_layouts");
1923 else if (options.version < 140)
1924 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
1925 if (!options.es && options.version < 440)
1926 require_extension_internal("GL_ARB_enhanced_layouts");
1927 }
1928 else if (options.es)
1929 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
1930 }
1931
1932 if (flags.get(DecorationIndex))
1933 attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
1934
1935 // Do not emit set = decoration in regular GLSL output, but
1936 // we need to preserve it in Vulkan GLSL mode.
1937 if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
1938 {
1939 if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
1940 attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
1941 }
1942
1943 bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
1944 bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
1945 (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
1946 bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
1947 bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
1948
1949 // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
1950 bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
1951
1952 // pretend no UBOs when options say so
1953 if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
1954 can_use_buffer_blocks = false;
1955
1956 bool can_use_binding;
1957 if (options.es)
1958 can_use_binding = options.version >= 310;
1959 else
1960 can_use_binding = options.enable_420pack_extension || (options.version >= 420);
1961
1962 // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
1963 if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
1964 can_use_binding = false;
1965
1966 if (var.storage == StorageClassShaderRecordBufferKHR)
1967 can_use_binding = false;
1968
1969 if (can_use_binding && flags.get(DecorationBinding))
1970 attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
1971
1972 if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
1973 attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
1974
1975 // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
1976 // If SPIR-V does not comply with either layout, we cannot really work around it.
1977 if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
1978 {
1979 attr.push_back(buffer_to_packing_standard(type, false));
1980 }
1981 else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
1982 {
1983 attr.push_back(buffer_to_packing_standard(type, true));
1984 }
1985
1986 // For images, the type itself adds a layout qualifer.
1987 // Only emit the format for storage images.
1988 if (type.basetype == SPIRType::Image && type.image.sampled == 2)
1989 {
1990 const char *fmt = format_to_glsl(type.image.format);
1991 if (fmt)
1992 attr.push_back(fmt);
1993 }
1994
1995 if (attr.empty())
1996 return "";
1997
1998 string res = "layout(";
1999 res += merge(attr);
2000 res += ") ";
2001 return res;
2002 }
2003
buffer_to_packing_standard(const SPIRType & type,bool support_std430_without_scalar_layout)2004 string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
2005 {
2006 if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
2007 return "std430";
2008 else if (buffer_is_packing_standard(type, BufferPackingStd140))
2009 return "std140";
2010 else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
2011 {
2012 require_extension_internal("GL_EXT_scalar_block_layout");
2013 return "scalar";
2014 }
2015 else if (support_std430_without_scalar_layout &&
2016 buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
2017 {
2018 if (options.es && !options.vulkan_semantics)
2019 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2020 "not support GL_ARB_enhanced_layouts.");
2021 if (!options.es && !options.vulkan_semantics && options.version < 440)
2022 require_extension_internal("GL_ARB_enhanced_layouts");
2023
2024 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2025 return "std430";
2026 }
2027 else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
2028 {
2029 // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
2030 // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
2031 // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
2032 if (options.es && !options.vulkan_semantics)
2033 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2034 "not support GL_ARB_enhanced_layouts.");
2035 if (!options.es && !options.vulkan_semantics && options.version < 440)
2036 require_extension_internal("GL_ARB_enhanced_layouts");
2037
2038 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2039 return "std140";
2040 }
2041 else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
2042 {
2043 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2044 require_extension_internal("GL_EXT_scalar_block_layout");
2045 return "scalar";
2046 }
2047 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2048 buffer_is_packing_standard(type, BufferPackingStd430))
2049 {
2050 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2051 require_extension_internal("GL_EXT_scalar_block_layout");
2052 return "std430";
2053 }
2054 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2055 buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
2056 {
2057 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2058 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2059 require_extension_internal("GL_EXT_scalar_block_layout");
2060 return "std430";
2061 }
2062 else
2063 {
2064 SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
2065 "layouts. You can try flattening this block to support a more flexible layout.");
2066 }
2067 }
2068
emit_push_constant_block(const SPIRVariable & var)2069 void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
2070 {
2071 if (flattened_buffer_blocks.count(var.self))
2072 emit_buffer_block_flattened(var);
2073 else if (options.vulkan_semantics)
2074 emit_push_constant_block_vulkan(var);
2075 else if (options.emit_push_constant_as_uniform_buffer)
2076 emit_buffer_block_native(var);
2077 else
2078 emit_push_constant_block_glsl(var);
2079 }
2080
emit_push_constant_block_vulkan(const SPIRVariable & var)2081 void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
2082 {
2083 emit_buffer_block(var);
2084 }
2085
emit_push_constant_block_glsl(const SPIRVariable & var)2086 void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
2087 {
2088 // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
2089 auto &type = get<SPIRType>(var.basetype);
2090
2091 auto &flags = ir.meta[var.self].decoration.decoration_flags;
2092 flags.clear(DecorationBinding);
2093 flags.clear(DecorationDescriptorSet);
2094
2095 #if 0
2096 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
2097 SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
2098 "Remap to location with reflection API first or disable these decorations.");
2099 #endif
2100
2101 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2102 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2103 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2104 bool block_flag = block_flags.get(DecorationBlock);
2105 block_flags.clear(DecorationBlock);
2106
2107 emit_struct(type);
2108
2109 if (block_flag)
2110 block_flags.set(DecorationBlock);
2111
2112 emit_uniform(var);
2113 statement("");
2114 }
2115
emit_buffer_block(const SPIRVariable & var)2116 void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
2117 {
2118 auto &type = get<SPIRType>(var.basetype);
2119 bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
2120
2121 if (flattened_buffer_blocks.count(var.self))
2122 emit_buffer_block_flattened(var);
2123 else if (is_legacy() || (!options.es && options.version == 130) ||
2124 (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
2125 emit_buffer_block_legacy(var);
2126 else
2127 emit_buffer_block_native(var);
2128 }
2129
emit_buffer_block_legacy(const SPIRVariable & var)2130 void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
2131 {
2132 auto &type = get<SPIRType>(var.basetype);
2133 bool ssbo = var.storage == StorageClassStorageBuffer ||
2134 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2135 if (ssbo)
2136 SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
2137
2138 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2139 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2140 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2141 bool block_flag = block_flags.get(DecorationBlock);
2142 block_flags.clear(DecorationBlock);
2143 emit_struct(type);
2144 if (block_flag)
2145 block_flags.set(DecorationBlock);
2146 emit_uniform(var);
2147 statement("");
2148 }
2149
emit_buffer_reference_block(SPIRType & type,bool forward_declaration)2150 void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration)
2151 {
2152 string buffer_name;
2153
2154 if (forward_declaration)
2155 {
2156 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2157 // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
2158 // The names must match up.
2159 buffer_name = to_name(type.self, false);
2160
2161 // Shaders never use the block by interface name, so we don't
2162 // have to track this other than updating name caches.
2163 // If we have a collision for any reason, just fallback immediately.
2164 if (ir.meta[type.self].decoration.alias.empty() ||
2165 block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
2166 resource_names.find(buffer_name) != end(resource_names))
2167 {
2168 buffer_name = join("_", type.self);
2169 }
2170
2171 // Make sure we get something unique for both global name scope and block name scope.
2172 // See GLSL 4.5 spec: section 4.3.9 for details.
2173 add_variable(block_ssbo_names, resource_names, buffer_name);
2174
2175 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2176 // This cannot conflict with anything else, so we're safe now.
2177 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2178 if (buffer_name.empty())
2179 buffer_name = join("_", type.self);
2180
2181 block_names.insert(buffer_name);
2182 block_ssbo_names.insert(buffer_name);
2183
2184 // Ensure we emit the correct name when emitting non-forward pointer type.
2185 ir.meta[type.self].decoration.alias = buffer_name;
2186 }
2187 else if (type.basetype != SPIRType::Struct)
2188 buffer_name = type_to_glsl(type);
2189 else
2190 buffer_name = to_name(type.self, false);
2191
2192 if (!forward_declaration)
2193 {
2194 if (type.basetype == SPIRType::Struct)
2195 {
2196 auto flags = ir.get_buffer_block_type_flags(type);
2197 string decorations;
2198 if (flags.get(DecorationRestrict))
2199 decorations += " restrict";
2200 if (flags.get(DecorationCoherent))
2201 decorations += " coherent";
2202 if (flags.get(DecorationNonReadable))
2203 decorations += " writeonly";
2204 if (flags.get(DecorationNonWritable))
2205 decorations += " readonly";
2206 statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true),
2207 ")", decorations, " buffer ", buffer_name);
2208 }
2209 else
2210 statement("layout(buffer_reference) buffer ", buffer_name);
2211
2212 begin_scope();
2213
2214 if (type.basetype == SPIRType::Struct)
2215 {
2216 type.member_name_cache.clear();
2217
2218 uint32_t i = 0;
2219 for (auto &member : type.member_types)
2220 {
2221 add_member_name(type, i);
2222 emit_struct_member(type, member, i);
2223 i++;
2224 }
2225 }
2226 else
2227 {
2228 auto &pointee_type = get_pointee_type(type);
2229 statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
2230 }
2231
2232 end_scope_decl();
2233 statement("");
2234 }
2235 else
2236 {
2237 statement("layout(buffer_reference) buffer ", buffer_name, ";");
2238 }
2239 }
2240
emit_buffer_block_native(const SPIRVariable & var)2241 void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2242 {
2243 auto &type = get<SPIRType>(var.basetype);
2244
2245 Bitset flags = ir.get_buffer_block_flags(var);
2246 bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2247 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2248 bool is_restrict = ssbo && flags.get(DecorationRestrict);
2249 bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
2250 bool is_readonly = ssbo && flags.get(DecorationNonWritable);
2251 bool is_coherent = ssbo && flags.get(DecorationCoherent);
2252
2253 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2254 auto buffer_name = to_name(type.self, false);
2255
2256 auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
2257
2258 // Shaders never use the block by interface name, so we don't
2259 // have to track this other than updating name caches.
2260 // If we have a collision for any reason, just fallback immediately.
2261 if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
2262 resource_names.find(buffer_name) != end(resource_names))
2263 {
2264 buffer_name = get_block_fallback_name(var.self);
2265 }
2266
2267 // Make sure we get something unique for both global name scope and block name scope.
2268 // See GLSL 4.5 spec: section 4.3.9 for details.
2269 add_variable(block_namespace, resource_names, buffer_name);
2270
2271 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2272 // This cannot conflict with anything else, so we're safe now.
2273 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2274 if (buffer_name.empty())
2275 buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2276
2277 block_names.insert(buffer_name);
2278 block_namespace.insert(buffer_name);
2279
2280 // Save for post-reflection later.
2281 declared_block_names[var.self] = buffer_name;
2282
2283 statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
2284 is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
2285 buffer_name);
2286
2287 begin_scope();
2288
2289 type.member_name_cache.clear();
2290
2291 uint32_t i = 0;
2292 for (auto &member : type.member_types)
2293 {
2294 add_member_name(type, i);
2295 emit_struct_member(type, member, i);
2296 i++;
2297 }
2298
2299 // var.self can be used as a backup name for the block name,
2300 // so we need to make sure we don't disturb the name here on a recompile.
2301 // It will need to be reset if we have to recompile.
2302 preserve_alias_on_reset(var.self);
2303 add_resource_name(var.self);
2304 end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
2305 statement("");
2306 }
2307
emit_buffer_block_flattened(const SPIRVariable & var)2308 void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
2309 {
2310 auto &type = get<SPIRType>(var.basetype);
2311
2312 // Block names should never alias.
2313 auto buffer_name = to_name(type.self, false);
2314 size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
2315
2316 SPIRType::BaseType basic_type;
2317 if (get_common_basic_type(type, basic_type))
2318 {
2319 SPIRType tmp;
2320 tmp.basetype = basic_type;
2321 tmp.vecsize = 4;
2322 if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
2323 SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
2324
2325 auto flags = ir.get_buffer_block_flags(var);
2326 statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
2327 buffer_size, "];");
2328 }
2329 else
2330 SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
2331 }
2332
to_storage_qualifiers_glsl(const SPIRVariable & var)2333 const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2334 {
2335 auto &execution = get_entry_point();
2336
2337 if (subpass_input_is_framebuffer_fetch(var.self))
2338 return "";
2339
2340 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
2341 {
2342 if (is_legacy() && execution.model == ExecutionModelVertex)
2343 return var.storage == StorageClassInput ? "attribute " : "varying ";
2344 else if (is_legacy() && execution.model == ExecutionModelFragment)
2345 return "varying "; // Fragment outputs are renamed so they never hit this case.
2346 else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
2347 {
2348 uint32_t loc = get_decoration(var.self, DecorationLocation);
2349 bool is_inout = location_is_framebuffer_fetch(loc);
2350 if (is_inout)
2351 return "inout ";
2352 else
2353 return "out ";
2354 }
2355 else
2356 return var.storage == StorageClassInput ? "in " : "out ";
2357 }
2358 else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
2359 var.storage == StorageClassPushConstant)
2360 {
2361 return "uniform ";
2362 }
2363 else if (var.storage == StorageClassRayPayloadKHR)
2364 {
2365 return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
2366 }
2367 else if (var.storage == StorageClassIncomingRayPayloadKHR)
2368 {
2369 return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
2370 }
2371 else if (var.storage == StorageClassHitAttributeKHR)
2372 {
2373 return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
2374 }
2375 else if (var.storage == StorageClassCallableDataKHR)
2376 {
2377 return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
2378 }
2379 else if (var.storage == StorageClassIncomingCallableDataKHR)
2380 {
2381 return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
2382 }
2383
2384 return "";
2385 }
2386
emit_flattened_io_block_member(const std::string & basename,const SPIRType & type,const char * qual,const SmallVector<uint32_t> & indices)2387 void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
2388 const SmallVector<uint32_t> &indices)
2389 {
2390 uint32_t member_type_id = type.self;
2391 const SPIRType *member_type = &type;
2392 const SPIRType *parent_type = nullptr;
2393 auto flattened_name = basename;
2394 for (auto &index : indices)
2395 {
2396 flattened_name += "_";
2397 flattened_name += to_member_name(*member_type, index);
2398 parent_type = member_type;
2399 member_type_id = member_type->member_types[index];
2400 member_type = &get<SPIRType>(member_type_id);
2401 }
2402
2403 assert(member_type->basetype != SPIRType::Struct);
2404
2405 // We're overriding struct member names, so ensure we do so on the primary type.
2406 if (parent_type->type_alias)
2407 parent_type = &get<SPIRType>(parent_type->type_alias);
2408
2409 // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
2410 // which is not allowed.
2411 ParsedIR::sanitize_underscores(flattened_name);
2412
2413 uint32_t last_index = indices.back();
2414
2415 // Pass in the varying qualifier here so it will appear in the correct declaration order.
2416 // Replace member name while emitting it so it encodes both struct name and member name.
2417 auto backup_name = get_member_name(parent_type->self, last_index);
2418 auto member_name = to_member_name(*parent_type, last_index);
2419 set_member_name(parent_type->self, last_index, flattened_name);
2420 emit_struct_member(*parent_type, member_type_id, last_index, qual);
2421 // Restore member name.
2422 set_member_name(parent_type->self, last_index, member_name);
2423 }
2424
emit_flattened_io_block_struct(const std::string & basename,const SPIRType & type,const char * qual,const SmallVector<uint32_t> & indices)2425 void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
2426 const SmallVector<uint32_t> &indices)
2427 {
2428 auto sub_indices = indices;
2429 sub_indices.push_back(0);
2430
2431 const SPIRType *member_type = &type;
2432 for (auto &index : indices)
2433 member_type = &get<SPIRType>(member_type->member_types[index]);
2434
2435 assert(member_type->basetype == SPIRType::Struct);
2436
2437 if (!member_type->array.empty())
2438 SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
2439
2440 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
2441 {
2442 sub_indices.back() = i;
2443 if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
2444 emit_flattened_io_block_struct(basename, type, qual, sub_indices);
2445 else
2446 emit_flattened_io_block_member(basename, type, qual, sub_indices);
2447 }
2448 }
2449
emit_flattened_io_block(const SPIRVariable & var,const char * qual)2450 void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
2451 {
2452 auto &var_type = get<SPIRType>(var.basetype);
2453 if (!var_type.array.empty())
2454 SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
2455
2456 // Emit flattened types based on the type alias. Normally, we are never supposed to emit
2457 // struct declarations for aliased types.
2458 auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
2459
2460 auto old_flags = ir.meta[type.self].decoration.decoration_flags;
2461 // Emit the members as if they are part of a block to get all qualifiers.
2462 ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
2463
2464 type.member_name_cache.clear();
2465
2466 SmallVector<uint32_t> member_indices;
2467 member_indices.push_back(0);
2468 auto basename = to_name(var.self);
2469
2470 uint32_t i = 0;
2471 for (auto &member : type.member_types)
2472 {
2473 add_member_name(type, i);
2474 auto &membertype = get<SPIRType>(member);
2475
2476 member_indices.back() = i;
2477 if (membertype.basetype == SPIRType::Struct)
2478 emit_flattened_io_block_struct(basename, type, qual, member_indices);
2479 else
2480 emit_flattened_io_block_member(basename, type, qual, member_indices);
2481 i++;
2482 }
2483
2484 ir.meta[type.self].decoration.decoration_flags = old_flags;
2485
2486 // Treat this variable as fully flattened from now on.
2487 flattened_structs[var.self] = true;
2488 }
2489
emit_interface_block(const SPIRVariable & var)2490 void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2491 {
2492 auto &type = get<SPIRType>(var.basetype);
2493
2494 if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
2495 !options.es && options.version < 410)
2496 {
2497 require_extension_internal("GL_ARB_vertex_attrib_64bit");
2498 }
2499
2500 // Either make it plain in/out or in/out blocks depending on what shader is doing ...
2501 bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
2502 const char *qual = to_storage_qualifiers_glsl(var);
2503
2504 if (block)
2505 {
2506 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2507 // I/O variables which are struct types.
2508 // To support this, flatten the struct into separate varyings instead.
2509 if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2510 (!options.es && options.version < 150))
2511 {
2512 // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2513 // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2514 emit_flattened_io_block(var, qual);
2515 }
2516 else
2517 {
2518 if (options.es && options.version < 320)
2519 {
2520 // Geometry and tessellation extensions imply this extension.
2521 if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
2522 require_extension_internal("GL_EXT_shader_io_blocks");
2523 }
2524
2525 // Workaround to make sure we can emit "patch in/out" correctly.
2526 fixup_io_block_patch_qualifiers(var);
2527
2528 // Block names should never alias.
2529 auto block_name = to_name(type.self, false);
2530
2531 // The namespace for I/O blocks is separate from other variables in GLSL.
2532 auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2533
2534 // Shaders never use the block by interface name, so we don't
2535 // have to track this other than updating name caches.
2536 if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
2537 block_name = get_fallback_name(type.self);
2538 else
2539 block_namespace.insert(block_name);
2540
2541 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2542 // This cannot conflict with anything else, so we're safe now.
2543 if (block_name.empty())
2544 block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2545
2546 // Instance names cannot alias block names.
2547 resource_names.insert(block_name);
2548
2549 bool is_patch = has_decoration(var.self, DecorationPatch);
2550 statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
2551 begin_scope();
2552
2553 type.member_name_cache.clear();
2554
2555 uint32_t i = 0;
2556 for (auto &member : type.member_types)
2557 {
2558 add_member_name(type, i);
2559 emit_struct_member(type, member, i);
2560 i++;
2561 }
2562
2563 add_resource_name(var.self);
2564 end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
2565 statement("");
2566 }
2567 }
2568 else
2569 {
2570 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2571 // I/O variables which are struct types.
2572 // To support this, flatten the struct into separate varyings instead.
2573 if (type.basetype == SPIRType::Struct &&
2574 (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2575 (!options.es && options.version < 150)))
2576 {
2577 emit_flattened_io_block(var, qual);
2578 }
2579 else
2580 {
2581 add_resource_name(var.self);
2582
2583 // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
2584 // Opt for unsized as it's the more "correct" variant to use.
2585 bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
2586 !has_decoration(var.self, DecorationPatch) &&
2587 (get_entry_point().model == ExecutionModelTessellationControl ||
2588 get_entry_point().model == ExecutionModelTessellationEvaluation);
2589
2590 uint32_t old_array_size = 0;
2591 bool old_array_size_literal = true;
2592
2593 if (control_point_input_array)
2594 {
2595 swap(type.array.back(), old_array_size);
2596 swap(type.array_size_literal.back(), old_array_size_literal);
2597 }
2598
2599 statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
2600 variable_decl(type, to_name(var.self), var.self), ";");
2601
2602 if (control_point_input_array)
2603 {
2604 swap(type.array.back(), old_array_size);
2605 swap(type.array_size_literal.back(), old_array_size_literal);
2606 }
2607 }
2608 }
2609 }
2610
emit_uniform(const SPIRVariable & var)2611 void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2612 {
2613 auto &type = get<SPIRType>(var.basetype);
2614 if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
2615 {
2616 if (!options.es && options.version < 420)
2617 require_extension_internal("GL_ARB_shader_image_load_store");
2618 else if (options.es && options.version < 310)
2619 SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2620 }
2621
2622 add_resource_name(var.self);
2623 statement(layout_for_variable(var), variable_decl(var), ";");
2624 }
2625
constant_value_macro_name(uint32_t id)2626 string CompilerGLSL::constant_value_macro_name(uint32_t id)
2627 {
2628 return join("SPIRV_CROSS_CONSTANT_ID_", id);
2629 }
2630
emit_specialization_constant_op(const SPIRConstantOp & constant)2631 void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2632 {
2633 auto &type = get<SPIRType>(constant.basetype);
2634 auto name = to_name(constant.self);
2635 statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
2636 }
2637
emit_constant(const SPIRConstant & constant)2638 void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2639 {
2640 auto &type = get<SPIRType>(constant.constant_type);
2641 auto name = to_name(constant.self);
2642
2643 SpecializationConstant wg_x, wg_y, wg_z;
2644 ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2645
2646 // This specialization constant is implicitly declared by emitting layout() in;
2647 if (constant.self == workgroup_size_id)
2648 return;
2649
2650 // These specialization constants are implicitly declared by emitting layout() in;
2651 // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2652 // later can use macro overrides for work group size.
2653 bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2654 ConstantID(constant.self) == wg_z.id;
2655
2656 if (options.vulkan_semantics && is_workgroup_size_constant)
2657 {
2658 // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2659 return;
2660 }
2661 else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2662 !has_decoration(constant.self, DecorationSpecId))
2663 {
2664 // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2665 return;
2666 }
2667
2668 // Only scalars have constant IDs.
2669 if (has_decoration(constant.self, DecorationSpecId))
2670 {
2671 if (options.vulkan_semantics)
2672 {
2673 statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
2674 variable_decl(type, name), " = ", constant_expression(constant), ";");
2675 }
2676 else
2677 {
2678 const string ¯o_name = constant.specialization_constant_macro_name;
2679 statement("#ifndef ", macro_name);
2680 statement("#define ", macro_name, " ", constant_expression(constant));
2681 statement("#endif");
2682
2683 // For workgroup size constants, only emit the macros.
2684 if (!is_workgroup_size_constant)
2685 statement("const ", variable_decl(type, name), " = ", macro_name, ";");
2686 }
2687 }
2688 else
2689 {
2690 statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
2691 }
2692 }
2693
emit_entry_point_declarations()2694 void CompilerGLSL::emit_entry_point_declarations()
2695 {
2696 }
2697
replace_illegal_names(const unordered_set<string> & keywords)2698 void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
2699 {
2700 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
2701 if (is_hidden_variable(var))
2702 return;
2703
2704 auto *meta = ir.find_meta(var.self);
2705 if (!meta)
2706 return;
2707
2708 auto &m = meta->decoration;
2709 if (keywords.find(m.alias) != end(keywords))
2710 m.alias = join("_", m.alias);
2711 });
2712
2713 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
2714 auto *meta = ir.find_meta(func.self);
2715 if (!meta)
2716 return;
2717
2718 auto &m = meta->decoration;
2719 if (keywords.find(m.alias) != end(keywords))
2720 m.alias = join("_", m.alias);
2721 });
2722
2723 ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
2724 auto *meta = ir.find_meta(type.self);
2725 if (!meta)
2726 return;
2727
2728 auto &m = meta->decoration;
2729 if (keywords.find(m.alias) != end(keywords))
2730 m.alias = join("_", m.alias);
2731
2732 for (auto &memb : meta->members)
2733 if (keywords.find(memb.alias) != end(keywords))
2734 memb.alias = join("_", memb.alias);
2735 });
2736 }
2737
replace_illegal_names()2738 void CompilerGLSL::replace_illegal_names()
2739 {
2740 // clang-format off
2741 static const unordered_set<string> keywords = {
2742 "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2743 "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2744 "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2745 "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2746 "ceil", "cos", "cosh", "cross", "degrees",
2747 "dFdx", "dFdxCoarse", "dFdxFine",
2748 "dFdy", "dFdyCoarse", "dFdyFine",
2749 "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2750 "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2751 "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
2752 "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
2753 "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
2754 "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
2755 "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
2756 "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
2757 "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
2758 "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
2759 "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
2760 "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
2761 "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
2762 "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
2763 "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
2764 "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
2765 "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
2766
2767 "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
2768 "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
2769 "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
2770 "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
2771 "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
2772 "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
2773 "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
2774 "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
2775 "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
2776 "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
2777 "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
2778 "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
2779 "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
2780 "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
2781 "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
2782 "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
2783 "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
2784 "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
2785 "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
2786 "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
2787 "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
2788 "while", "writeonly",
2789 };
2790 // clang-format on
2791
2792 replace_illegal_names(keywords);
2793 }
2794
replace_fragment_output(SPIRVariable & var)2795 void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
2796 {
2797 auto &m = ir.meta[var.self].decoration;
2798 uint32_t location = 0;
2799 if (m.decoration_flags.get(DecorationLocation))
2800 location = m.location;
2801
2802 // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
2803 // do the access chain part of this for us.
2804 auto &type = get<SPIRType>(var.basetype);
2805
2806 if (type.array.empty())
2807 {
2808 // Redirect the write to a specific render target in legacy GLSL.
2809 m.alias = join("gl_FragData[", location, "]");
2810
2811 if (is_legacy_es() && location != 0)
2812 require_extension_internal("GL_EXT_draw_buffers");
2813 }
2814 else if (type.array.size() == 1)
2815 {
2816 // If location is non-zero, we probably have to add an offset.
2817 // This gets really tricky since we'd have to inject an offset in the access chain.
2818 // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
2819 m.alias = "gl_FragData";
2820 if (location != 0)
2821 SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
2822 "This is unimplemented in SPIRV-Cross.");
2823
2824 if (is_legacy_es())
2825 require_extension_internal("GL_EXT_draw_buffers");
2826 }
2827 else
2828 SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
2829
2830 var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2831 }
2832
replace_fragment_outputs()2833 void CompilerGLSL::replace_fragment_outputs()
2834 {
2835 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2836 auto &type = this->get<SPIRType>(var.basetype);
2837
2838 if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
2839 replace_fragment_output(var);
2840 });
2841 }
2842
remap_swizzle(const SPIRType & out_type,uint32_t input_components,const string & expr)2843 string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2844 {
2845 if (out_type.vecsize == input_components)
2846 return expr;
2847 else if (input_components == 1 && !backend.can_swizzle_scalar)
2848 return join(type_to_glsl(out_type), "(", expr, ")");
2849 else
2850 {
2851 // FIXME: This will not work with packed expressions.
2852 auto e = enclose_expression(expr) + ".";
2853 // Just clamp the swizzle index if we have more outputs than inputs.
2854 for (uint32_t c = 0; c < out_type.vecsize; c++)
2855 e += index_to_swizzle(min(c, input_components - 1));
2856 if (backend.swizzle_is_function && out_type.vecsize > 1)
2857 e += "()";
2858
2859 remove_duplicate_swizzle(e);
2860 return e;
2861 }
2862 }
2863
emit_pls()2864 void CompilerGLSL::emit_pls()
2865 {
2866 auto &execution = get_entry_point();
2867 if (execution.model != ExecutionModelFragment)
2868 SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
2869
2870 if (!options.es)
2871 SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
2872
2873 if (options.version < 300)
2874 SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
2875
2876 if (!pls_inputs.empty())
2877 {
2878 statement("__pixel_local_inEXT _PLSIn");
2879 begin_scope();
2880 for (auto &input : pls_inputs)
2881 statement(pls_decl(input), ";");
2882 end_scope_decl();
2883 statement("");
2884 }
2885
2886 if (!pls_outputs.empty())
2887 {
2888 statement("__pixel_local_outEXT _PLSOut");
2889 begin_scope();
2890 for (auto &output : pls_outputs)
2891 statement(pls_decl(output), ";");
2892 end_scope_decl();
2893 statement("");
2894 }
2895 }
2896
fixup_image_load_store_access()2897 void CompilerGLSL::fixup_image_load_store_access()
2898 {
2899 if (!options.enable_storage_image_qualifier_deduction)
2900 return;
2901
2902 ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
2903 auto &vartype = expression_type(var);
2904 if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
2905 {
2906 // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
2907 // Solve this by making the image access as restricted as possible and loosen up if we need to.
2908 // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
2909
2910 auto &flags = ir.meta[var].decoration.decoration_flags;
2911 if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
2912 {
2913 flags.set(DecorationNonWritable);
2914 flags.set(DecorationNonReadable);
2915 }
2916 }
2917 });
2918 }
2919
is_block_builtin(BuiltIn builtin)2920 static bool is_block_builtin(BuiltIn builtin)
2921 {
2922 return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
2923 builtin == BuiltInCullDistance;
2924 }
2925
should_force_emit_builtin_block(StorageClass storage)2926 bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
2927 {
2928 // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
2929
2930 if (storage != StorageClassOutput)
2931 return false;
2932 bool should_force = false;
2933
2934 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2935 if (should_force)
2936 return;
2937
2938 auto &type = this->get<SPIRType>(var.basetype);
2939 bool block = has_decoration(type.self, DecorationBlock);
2940 if (var.storage == storage && block && is_builtin_variable(var))
2941 {
2942 uint32_t member_count = uint32_t(type.member_types.size());
2943 for (uint32_t i = 0; i < member_count; i++)
2944 {
2945 if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
2946 is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
2947 has_member_decoration(type.self, i, DecorationOffset))
2948 {
2949 should_force = true;
2950 }
2951 }
2952 }
2953 else if (var.storage == storage && !block && is_builtin_variable(var))
2954 {
2955 if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
2956 has_decoration(var.self, DecorationOffset))
2957 {
2958 should_force = true;
2959 }
2960 }
2961 });
2962
2963 // If we're declaring clip/cull planes with control points we need to force block declaration.
2964 if (get_execution_model() == ExecutionModelTessellationControl &&
2965 (clip_distance_count || cull_distance_count))
2966 {
2967 should_force = true;
2968 }
2969
2970 return should_force;
2971 }
2972
fixup_implicit_builtin_block_names()2973 void CompilerGLSL::fixup_implicit_builtin_block_names()
2974 {
2975 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2976 auto &type = this->get<SPIRType>(var.basetype);
2977 bool block = has_decoration(type.self, DecorationBlock);
2978 if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
2979 is_builtin_variable(var))
2980 {
2981 // Make sure the array has a supported name in the code.
2982 if (var.storage == StorageClassOutput)
2983 set_name(var.self, "gl_out");
2984 else if (var.storage == StorageClassInput)
2985 set_name(var.self, "gl_in");
2986 }
2987 });
2988 }
2989
emit_declared_builtin_block(StorageClass storage,ExecutionModel model)2990 void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
2991 {
2992 Bitset emitted_builtins;
2993 Bitset global_builtins;
2994 const SPIRVariable *block_var = nullptr;
2995 bool emitted_block = false;
2996 bool builtin_array = false;
2997
2998 // Need to use declared size in the type.
2999 // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
3000 uint32_t cull_distance_size = 0;
3001 uint32_t clip_distance_size = 0;
3002
3003 bool have_xfb_buffer_stride = false;
3004 bool have_geom_stream = false;
3005 bool have_any_xfb_offset = false;
3006 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
3007 std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
3008
3009 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3010 auto &type = this->get<SPIRType>(var.basetype);
3011 bool block = has_decoration(type.self, DecorationBlock);
3012 Bitset builtins;
3013
3014 if (var.storage == storage && block && is_builtin_variable(var))
3015 {
3016 uint32_t index = 0;
3017 for (auto &m : ir.meta[type.self].members)
3018 {
3019 if (m.builtin)
3020 {
3021 builtins.set(m.builtin_type);
3022 if (m.builtin_type == BuiltInCullDistance)
3023 cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
3024 else if (m.builtin_type == BuiltInClipDistance)
3025 clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
3026
3027 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
3028 {
3029 have_any_xfb_offset = true;
3030 builtin_xfb_offsets[m.builtin_type] = m.offset;
3031 }
3032
3033 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
3034 {
3035 uint32_t stream = m.stream;
3036 if (have_geom_stream && geom_stream != stream)
3037 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3038 have_geom_stream = true;
3039 geom_stream = stream;
3040 }
3041 }
3042 index++;
3043 }
3044
3045 if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
3046 has_decoration(var.self, DecorationXfbStride))
3047 {
3048 uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
3049 uint32_t stride = get_decoration(var.self, DecorationXfbStride);
3050 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3051 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3052 if (have_xfb_buffer_stride && stride != xfb_stride)
3053 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3054 have_xfb_buffer_stride = true;
3055 xfb_buffer = buffer_index;
3056 xfb_stride = stride;
3057 }
3058
3059 if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
3060 {
3061 uint32_t stream = get_decoration(var.self, DecorationStream);
3062 if (have_geom_stream && geom_stream != stream)
3063 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3064 have_geom_stream = true;
3065 geom_stream = stream;
3066 }
3067 }
3068 else if (var.storage == storage && !block && is_builtin_variable(var))
3069 {
3070 // While we're at it, collect all declared global builtins (HLSL mostly ...).
3071 auto &m = ir.meta[var.self].decoration;
3072 if (m.builtin)
3073 {
3074 global_builtins.set(m.builtin_type);
3075 if (m.builtin_type == BuiltInCullDistance)
3076 cull_distance_size = to_array_size_literal(type);
3077 else if (m.builtin_type == BuiltInClipDistance)
3078 clip_distance_size = to_array_size_literal(type);
3079
3080 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
3081 m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
3082 {
3083 have_any_xfb_offset = true;
3084 builtin_xfb_offsets[m.builtin_type] = m.offset;
3085 uint32_t buffer_index = m.xfb_buffer;
3086 uint32_t stride = m.xfb_stride;
3087 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3088 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3089 if (have_xfb_buffer_stride && stride != xfb_stride)
3090 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3091 have_xfb_buffer_stride = true;
3092 xfb_buffer = buffer_index;
3093 xfb_stride = stride;
3094 }
3095
3096 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
3097 {
3098 uint32_t stream = get_decoration(var.self, DecorationStream);
3099 if (have_geom_stream && geom_stream != stream)
3100 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3101 have_geom_stream = true;
3102 geom_stream = stream;
3103 }
3104 }
3105 }
3106
3107 if (builtins.empty())
3108 return;
3109
3110 if (emitted_block)
3111 SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
3112
3113 emitted_builtins = builtins;
3114 emitted_block = true;
3115 builtin_array = !type.array.empty();
3116 block_var = &var;
3117 });
3118
3119 global_builtins =
3120 Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
3121 (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
3122
3123 // Try to collect all other declared builtins.
3124 if (!emitted_block)
3125 emitted_builtins = global_builtins;
3126
3127 // Can't declare an empty interface block.
3128 if (emitted_builtins.empty())
3129 return;
3130
3131 if (storage == StorageClassOutput)
3132 {
3133 SmallVector<string> attr;
3134 if (have_xfb_buffer_stride && have_any_xfb_offset)
3135 {
3136 if (!options.es)
3137 {
3138 if (options.version < 440 && options.version >= 140)
3139 require_extension_internal("GL_ARB_enhanced_layouts");
3140 else if (options.version < 140)
3141 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
3142 if (!options.es && options.version < 440)
3143 require_extension_internal("GL_ARB_enhanced_layouts");
3144 }
3145 else if (options.es)
3146 SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
3147 attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
3148 }
3149
3150 if (have_geom_stream)
3151 {
3152 if (get_execution_model() != ExecutionModelGeometry)
3153 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
3154 if (options.es)
3155 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
3156 if (options.version < 400)
3157 require_extension_internal("GL_ARB_transform_feedback3");
3158 attr.push_back(join("stream = ", geom_stream));
3159 }
3160
3161 if (!attr.empty())
3162 statement("layout(", merge(attr), ") out gl_PerVertex");
3163 else
3164 statement("out gl_PerVertex");
3165 }
3166 else
3167 {
3168 // If we have passthrough, there is no way PerVertex cannot be passthrough.
3169 if (get_entry_point().geometry_passthrough)
3170 statement("layout(passthrough) in gl_PerVertex");
3171 else
3172 statement("in gl_PerVertex");
3173 }
3174
3175 begin_scope();
3176 if (emitted_builtins.get(BuiltInPosition))
3177 {
3178 auto itr = builtin_xfb_offsets.find(BuiltInPosition);
3179 if (itr != end(builtin_xfb_offsets))
3180 statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
3181 else
3182 statement("vec4 gl_Position;");
3183 }
3184
3185 if (emitted_builtins.get(BuiltInPointSize))
3186 {
3187 auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
3188 if (itr != end(builtin_xfb_offsets))
3189 statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
3190 else
3191 statement("float gl_PointSize;");
3192 }
3193
3194 if (emitted_builtins.get(BuiltInClipDistance))
3195 {
3196 auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
3197 if (itr != end(builtin_xfb_offsets))
3198 statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
3199 else
3200 statement("float gl_ClipDistance[", clip_distance_size, "];");
3201 }
3202
3203 if (emitted_builtins.get(BuiltInCullDistance))
3204 {
3205 auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
3206 if (itr != end(builtin_xfb_offsets))
3207 statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
3208 else
3209 statement("float gl_CullDistance[", cull_distance_size, "];");
3210 }
3211
3212 if (builtin_array)
3213 {
3214 if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
3215 end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
3216 else
3217 end_scope_decl(join(to_name(block_var->self), "[]"));
3218 }
3219 else
3220 end_scope_decl();
3221 statement("");
3222 }
3223
declare_undefined_values()3224 void CompilerGLSL::declare_undefined_values()
3225 {
3226 bool emitted = false;
3227 ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
3228 auto &type = this->get<SPIRType>(undef.basetype);
3229 // OpUndef can be void for some reason ...
3230 if (type.basetype == SPIRType::Void)
3231 return;
3232
3233 string initializer;
3234 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
3235 initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
3236
3237 statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
3238 emitted = true;
3239 });
3240
3241 if (emitted)
3242 statement("");
3243 }
3244
variable_is_lut(const SPIRVariable & var) const3245 bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
3246 {
3247 bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
3248
3249 if (statically_assigned)
3250 {
3251 auto *constant = maybe_get<SPIRConstant>(var.static_expression);
3252 if (constant && constant->is_used_as_lut)
3253 return true;
3254 }
3255
3256 return false;
3257 }
3258
emit_resources()3259 void CompilerGLSL::emit_resources()
3260 {
3261 auto &execution = get_entry_point();
3262
3263 replace_illegal_names();
3264
3265 // Legacy GL uses gl_FragData[], redeclare all fragment outputs
3266 // with builtins.
3267 if (execution.model == ExecutionModelFragment && is_legacy())
3268 replace_fragment_outputs();
3269
3270 // Emit PLS blocks if we have such variables.
3271 if (!pls_inputs.empty() || !pls_outputs.empty())
3272 emit_pls();
3273
3274 switch (execution.model)
3275 {
3276 case ExecutionModelGeometry:
3277 case ExecutionModelTessellationControl:
3278 case ExecutionModelTessellationEvaluation:
3279 fixup_implicit_builtin_block_names();
3280 break;
3281
3282 default:
3283 break;
3284 }
3285
3286 // Emit custom gl_PerVertex for SSO compatibility.
3287 if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
3288 {
3289 switch (execution.model)
3290 {
3291 case ExecutionModelGeometry:
3292 case ExecutionModelTessellationControl:
3293 case ExecutionModelTessellationEvaluation:
3294 emit_declared_builtin_block(StorageClassInput, execution.model);
3295 emit_declared_builtin_block(StorageClassOutput, execution.model);
3296 break;
3297
3298 case ExecutionModelVertex:
3299 emit_declared_builtin_block(StorageClassOutput, execution.model);
3300 break;
3301
3302 default:
3303 break;
3304 }
3305 }
3306 else if (should_force_emit_builtin_block(StorageClassOutput))
3307 {
3308 emit_declared_builtin_block(StorageClassOutput, execution.model);
3309 }
3310 else if (execution.geometry_passthrough)
3311 {
3312 // Need to declare gl_in with Passthrough.
3313 // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
3314 emit_declared_builtin_block(StorageClassInput, execution.model);
3315 }
3316 else
3317 {
3318 // Need to redeclare clip/cull distance with explicit size to use them.
3319 // SPIR-V mandates these builtins have a size declared.
3320 const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
3321 if (clip_distance_count != 0)
3322 statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
3323 if (cull_distance_count != 0)
3324 statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
3325 if (clip_distance_count != 0 || cull_distance_count != 0)
3326 statement("");
3327 }
3328
3329 if (position_invariant)
3330 {
3331 statement("invariant gl_Position;");
3332 statement("");
3333 }
3334
3335 bool emitted = false;
3336
3337 // If emitted Vulkan GLSL,
3338 // emit specialization constants as actual floats,
3339 // spec op expressions will redirect to the constant name.
3340 //
3341 {
3342 auto loop_lock = ir.create_loop_hard_lock();
3343 for (auto &id_ : ir.ids_for_constant_or_type)
3344 {
3345 auto &id = ir.ids[id_];
3346
3347 if (id.get_type() == TypeConstant)
3348 {
3349 auto &c = id.get<SPIRConstant>();
3350
3351 bool needs_declaration = c.specialization || c.is_used_as_lut;
3352
3353 if (needs_declaration)
3354 {
3355 if (!options.vulkan_semantics && c.specialization)
3356 {
3357 c.specialization_constant_macro_name =
3358 constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
3359 }
3360 emit_constant(c);
3361 emitted = true;
3362 }
3363 }
3364 else if (id.get_type() == TypeConstantOp)
3365 {
3366 emit_specialization_constant_op(id.get<SPIRConstantOp>());
3367 emitted = true;
3368 }
3369 else if (id.get_type() == TypeType)
3370 {
3371 auto *type = &id.get<SPIRType>();
3372
3373 bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
3374 (!has_decoration(type->self, DecorationBlock) &&
3375 !has_decoration(type->self, DecorationBufferBlock));
3376
3377 // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
3378 if (type->basetype == SPIRType::Struct && type->pointer &&
3379 has_decoration(type->self, DecorationBlock) &&
3380 (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
3381 type->storage == StorageClassHitAttributeKHR))
3382 {
3383 type = &get<SPIRType>(type->parent_type);
3384 is_natural_struct = true;
3385 }
3386
3387 if (is_natural_struct)
3388 {
3389 if (emitted)
3390 statement("");
3391 emitted = false;
3392
3393 emit_struct(*type);
3394 }
3395 }
3396 }
3397 }
3398
3399 if (emitted)
3400 statement("");
3401
3402 // If we needed to declare work group size late, check here.
3403 // If the work group size depends on a specialization constant, we need to declare the layout() block
3404 // after constants (and their macros) have been declared.
3405 if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
3406 execution.workgroup_size.constant != 0)
3407 {
3408 SpecializationConstant wg_x, wg_y, wg_z;
3409 get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
3410
3411 if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
3412 {
3413 SmallVector<string> inputs;
3414 build_workgroup_size(inputs, wg_x, wg_y, wg_z);
3415 statement("layout(", merge(inputs), ") in;");
3416 statement("");
3417 }
3418 }
3419
3420 emitted = false;
3421
3422 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
3423 {
3424 for (auto type : physical_storage_non_block_pointer_types)
3425 {
3426 emit_buffer_reference_block(get<SPIRType>(type), false);
3427 }
3428
3429 // Output buffer reference blocks.
3430 // Do this in two stages, one with forward declaration,
3431 // and one without. Buffer reference blocks can reference themselves
3432 // to support things like linked lists.
3433 ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
3434 bool has_block_flags = has_decoration(type.self, DecorationBlock);
3435 if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3436 type.storage == StorageClassPhysicalStorageBufferEXT)
3437 {
3438 emit_buffer_reference_block(type, true);
3439 }
3440 });
3441
3442 ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
3443 bool has_block_flags = has_decoration(type.self, DecorationBlock);
3444 if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3445 type.storage == StorageClassPhysicalStorageBufferEXT)
3446 {
3447 emit_buffer_reference_block(type, false);
3448 }
3449 });
3450 }
3451
3452 // Output UBOs and SSBOs
3453 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3454 auto &type = this->get<SPIRType>(var.basetype);
3455
3456 bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
3457 type.storage == StorageClassShaderRecordBufferKHR;
3458 bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
3459 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
3460
3461 if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
3462 has_block_flags)
3463 {
3464 emit_buffer_block(var);
3465 }
3466 });
3467
3468 // Output push constant blocks
3469 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3470 auto &type = this->get<SPIRType>(var.basetype);
3471 if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
3472 !is_hidden_variable(var))
3473 {
3474 emit_push_constant_block(var);
3475 }
3476 });
3477
3478 bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
3479
3480 // Output Uniform Constants (values, samplers, images, etc).
3481 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3482 auto &type = this->get<SPIRType>(var.basetype);
3483
3484 // If we're remapping separate samplers and images, only emit the combined samplers.
3485 if (skip_separate_image_sampler)
3486 {
3487 // Sampler buffers are always used without a sampler, and they will also work in regular GL.
3488 bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
3489 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3490 bool separate_sampler = type.basetype == SPIRType::Sampler;
3491 if (!sampler_buffer && (separate_image || separate_sampler))
3492 return;
3493 }
3494
3495 if (var.storage != StorageClassFunction && type.pointer &&
3496 (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
3497 type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
3498 type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
3499 type.storage == StorageClassHitAttributeKHR) &&
3500 !is_hidden_variable(var))
3501 {
3502 emit_uniform(var);
3503 emitted = true;
3504 }
3505 });
3506
3507 if (emitted)
3508 statement("");
3509 emitted = false;
3510
3511 bool emitted_base_instance = false;
3512
3513 // Output in/out interfaces.
3514 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3515 auto &type = this->get<SPIRType>(var.basetype);
3516
3517 bool is_hidden = is_hidden_variable(var);
3518
3519 // Unused output I/O variables might still be required to implement framebuffer fetch.
3520 if (var.storage == StorageClassOutput && !is_legacy() &&
3521 location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0)
3522 {
3523 is_hidden = false;
3524 }
3525
3526 if (var.storage != StorageClassFunction && type.pointer &&
3527 (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
3528 interface_variable_exists_in_entry_point(var.self) && !is_hidden)
3529 {
3530 emit_interface_block(var);
3531 emitted = true;
3532 }
3533 else if (is_builtin_variable(var))
3534 {
3535 auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
3536 // For gl_InstanceIndex emulation on GLES, the API user needs to
3537 // supply this uniform.
3538
3539 // The draw parameter extension is soft-enabled on GL with some fallbacks.
3540 if (!options.vulkan_semantics)
3541 {
3542 if (!emitted_base_instance &&
3543 ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
3544 (builtin == BuiltInBaseInstance)))
3545 {
3546 statement("#ifdef GL_ARB_shader_draw_parameters");
3547 statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
3548 statement("#else");
3549 // A crude, but simple workaround which should be good enough for non-indirect draws.
3550 statement("uniform int SPIRV_Cross_BaseInstance;");
3551 statement("#endif");
3552 emitted = true;
3553 emitted_base_instance = true;
3554 }
3555 else if (builtin == BuiltInBaseVertex)
3556 {
3557 statement("#ifdef GL_ARB_shader_draw_parameters");
3558 statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
3559 statement("#else");
3560 // A crude, but simple workaround which should be good enough for non-indirect draws.
3561 statement("uniform int SPIRV_Cross_BaseVertex;");
3562 statement("#endif");
3563 }
3564 else if (builtin == BuiltInDrawIndex)
3565 {
3566 statement("#ifndef GL_ARB_shader_draw_parameters");
3567 // Cannot really be worked around.
3568 statement("#error GL_ARB_shader_draw_parameters is not supported.");
3569 statement("#endif");
3570 }
3571 }
3572 }
3573 });
3574
3575 // Global variables.
3576 for (auto global : global_variables)
3577 {
3578 auto &var = get<SPIRVariable>(global);
3579 if (is_hidden_variable(var, true))
3580 continue;
3581
3582 if (var.storage != StorageClassOutput)
3583 {
3584 if (!variable_is_lut(var))
3585 {
3586 add_resource_name(var.self);
3587
3588 string initializer;
3589 if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
3590 !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
3591 {
3592 initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
3593 }
3594
3595 statement(variable_decl(var), initializer, ";");
3596 emitted = true;
3597 }
3598 }
3599 else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
3600 {
3601 emit_output_variable_initializer(var);
3602 }
3603 }
3604
3605 if (emitted)
3606 statement("");
3607
3608 declare_undefined_values();
3609 }
3610
emit_output_variable_initializer(const SPIRVariable & var)3611 void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
3612 {
3613 // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
3614 auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
3615 auto &type = get<SPIRType>(var.basetype);
3616 bool is_patch = has_decoration(var.self, DecorationPatch);
3617 bool is_block = has_decoration(type.self, DecorationBlock);
3618 bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
3619
3620 if (is_block)
3621 {
3622 uint32_t member_count = uint32_t(type.member_types.size());
3623 bool type_is_array = type.array.size() == 1;
3624 uint32_t array_size = 1;
3625 if (type_is_array)
3626 array_size = to_array_size_literal(type);
3627 uint32_t iteration_count = is_control_point ? 1 : array_size;
3628
3629 // If the initializer is a block, we must initialize each block member one at a time.
3630 for (uint32_t i = 0; i < member_count; i++)
3631 {
3632 // These outputs might not have been properly declared, so don't initialize them in that case.
3633 if (has_member_decoration(type.self, i, DecorationBuiltIn))
3634 {
3635 if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
3636 !cull_distance_count)
3637 continue;
3638
3639 if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
3640 !clip_distance_count)
3641 continue;
3642 }
3643
3644 // We need to build a per-member array first, essentially transposing from AoS to SoA.
3645 // This code path hits when we have an array of blocks.
3646 string lut_name;
3647 if (type_is_array)
3648 {
3649 lut_name = join("_", var.self, "_", i, "_init");
3650 uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
3651 auto &member_type = get<SPIRType>(member_type_id);
3652 auto array_type = member_type;
3653 array_type.parent_type = member_type_id;
3654 array_type.array.push_back(array_size);
3655 array_type.array_size_literal.push_back(true);
3656
3657 SmallVector<string> exprs;
3658 exprs.reserve(array_size);
3659 auto &c = get<SPIRConstant>(var.initializer);
3660 for (uint32_t j = 0; j < array_size; j++)
3661 exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
3662 statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
3663 type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
3664 }
3665
3666 for (uint32_t j = 0; j < iteration_count; j++)
3667 {
3668 entry_func.fixup_hooks_in.push_back([=, &var]() {
3669 AccessChainMeta meta;
3670 auto &c = this->get<SPIRConstant>(var.initializer);
3671
3672 uint32_t invocation_id = 0;
3673 uint32_t member_index_id = 0;
3674 if (is_control_point)
3675 {
3676 uint32_t ids = ir.increase_bound_by(3);
3677 SPIRType uint_type;
3678 uint_type.basetype = SPIRType::UInt;
3679 uint_type.width = 32;
3680 set<SPIRType>(ids, uint_type);
3681 set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
3682 set<SPIRConstant>(ids + 2, ids, i, false);
3683 invocation_id = ids + 1;
3684 member_index_id = ids + 2;
3685 }
3686
3687 if (is_patch)
3688 {
3689 statement("if (gl_InvocationID == 0)");
3690 begin_scope();
3691 }
3692
3693 if (type_is_array && !is_control_point)
3694 {
3695 uint32_t indices[2] = { j, i };
3696 auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3697 statement(chain, " = ", lut_name, "[", j, "];");
3698 }
3699 else if (is_control_point)
3700 {
3701 uint32_t indices[2] = { invocation_id, member_index_id };
3702 auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
3703 statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
3704 }
3705 else
3706 {
3707 auto chain =
3708 access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3709 statement(chain, " = ", to_expression(c.subconstants[i]), ";");
3710 }
3711
3712 if (is_patch)
3713 end_scope();
3714 });
3715 }
3716 }
3717 }
3718 else if (is_control_point)
3719 {
3720 auto lut_name = join("_", var.self, "_init");
3721 statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
3722 " = ", to_expression(var.initializer), ";");
3723 entry_func.fixup_hooks_in.push_back([&, lut_name]() {
3724 statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
3725 });
3726 }
3727 else if (has_decoration(var.self, DecorationBuiltIn) &&
3728 BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask)
3729 {
3730 // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
3731 entry_func.fixup_hooks_in.push_back([&] {
3732 auto &c = this->get<SPIRConstant>(var.initializer);
3733 uint32_t num_constants = uint32_t(c.subconstants.size());
3734 for (uint32_t i = 0; i < num_constants; i++)
3735 {
3736 // Don't use to_expression on constant since it might be uint, just fish out the raw int.
3737 statement(to_expression(var.self), "[", i, "] = ",
3738 convert_to_string(this->get<SPIRConstant>(c.subconstants[i]).scalar_i32()), ";");
3739 }
3740 });
3741 }
3742 else
3743 {
3744 auto lut_name = join("_", var.self, "_init");
3745 statement("const ", type_to_glsl(type), " ", lut_name,
3746 type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
3747 entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
3748 if (is_patch)
3749 {
3750 statement("if (gl_InvocationID == 0)");
3751 begin_scope();
3752 }
3753 statement(to_expression(var.self), " = ", lut_name, ";");
3754 if (is_patch)
3755 end_scope();
3756 });
3757 }
3758 }
3759
emit_extension_workarounds(spv::ExecutionModel model)3760 void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
3761 {
3762 static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
3763 "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
3764
3765 if (!options.vulkan_semantics)
3766 {
3767 using Supp = ShaderSubgroupSupportHelper;
3768 auto result = shader_subgroup_supporter.resolve();
3769
3770 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
3771 {
3772 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
3773
3774 for (auto &e : exts)
3775 {
3776 const char *name = Supp::get_extension_name(e);
3777 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3778
3779 switch (e)
3780 {
3781 case Supp::NV_shader_thread_group:
3782 statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
3783 statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
3784 statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
3785 statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
3786 statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
3787 break;
3788 case Supp::ARB_shader_ballot:
3789 statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
3790 statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
3791 statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
3792 statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
3793 statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
3794 break;
3795 default:
3796 break;
3797 }
3798 }
3799 statement("#endif");
3800 statement("");
3801 }
3802
3803 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
3804 {
3805 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
3806
3807 for (auto &e : exts)
3808 {
3809 const char *name = Supp::get_extension_name(e);
3810 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3811
3812 switch (e)
3813 {
3814 case Supp::NV_shader_thread_group:
3815 statement("#define gl_SubgroupSize gl_WarpSizeNV");
3816 break;
3817 case Supp::ARB_shader_ballot:
3818 statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
3819 break;
3820 case Supp::AMD_gcn_shader:
3821 statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
3822 break;
3823 default:
3824 break;
3825 }
3826 }
3827 statement("#endif");
3828 statement("");
3829 }
3830
3831 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
3832 {
3833 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
3834
3835 for (auto &e : exts)
3836 {
3837 const char *name = Supp::get_extension_name(e);
3838 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3839
3840 switch (e)
3841 {
3842 case Supp::NV_shader_thread_group:
3843 statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
3844 break;
3845 case Supp::ARB_shader_ballot:
3846 statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
3847 break;
3848 default:
3849 break;
3850 }
3851 }
3852 statement("#endif");
3853 statement("");
3854 }
3855
3856 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
3857 {
3858 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
3859
3860 for (auto &e : exts)
3861 {
3862 const char *name = Supp::get_extension_name(e);
3863 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3864
3865 switch (e)
3866 {
3867 case Supp::NV_shader_thread_group:
3868 statement("#define gl_SubgroupID gl_WarpIDNV");
3869 break;
3870 default:
3871 break;
3872 }
3873 }
3874 statement("#endif");
3875 statement("");
3876 }
3877
3878 if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
3879 {
3880 auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
3881
3882 for (auto &e : exts)
3883 {
3884 const char *name = Supp::get_extension_name(e);
3885 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3886
3887 switch (e)
3888 {
3889 case Supp::NV_shader_thread_group:
3890 statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
3891 break;
3892 default:
3893 break;
3894 }
3895 }
3896 statement("#endif");
3897 statement("");
3898 }
3899
3900 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
3901 {
3902 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
3903
3904 for (auto &e : exts)
3905 {
3906 const char *name = Supp::get_extension_name(e);
3907 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3908
3909 switch (e)
3910 {
3911 case Supp::NV_shader_thread_shuffle:
3912 for (const char *t : workaround_types)
3913 {
3914 statement(t, " subgroupBroadcastFirst(", t,
3915 " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
3916 }
3917 for (const char *t : workaround_types)
3918 {
3919 statement(t, " subgroupBroadcast(", t,
3920 " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
3921 }
3922 break;
3923 case Supp::ARB_shader_ballot:
3924 for (const char *t : workaround_types)
3925 {
3926 statement(t, " subgroupBroadcastFirst(", t,
3927 " value) { return readFirstInvocationARB(value); }");
3928 }
3929 for (const char *t : workaround_types)
3930 {
3931 statement(t, " subgroupBroadcast(", t,
3932 " value, uint id) { return readInvocationARB(value, id); }");
3933 }
3934 break;
3935 default:
3936 break;
3937 }
3938 }
3939 statement("#endif");
3940 statement("");
3941 }
3942
3943 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
3944 {
3945 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
3946
3947 for (auto &e : exts)
3948 {
3949 const char *name = Supp::get_extension_name(e);
3950 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3951
3952 switch (e)
3953 {
3954 case Supp::NV_shader_thread_group:
3955 statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
3956 statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
3957 break;
3958 default:
3959 break;
3960 }
3961 }
3962 statement("#else");
3963 statement("uint subgroupBallotFindLSB(uvec4 value)");
3964 begin_scope();
3965 statement("int firstLive = findLSB(value.x);");
3966 statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
3967 end_scope();
3968 statement("uint subgroupBallotFindMSB(uvec4 value)");
3969 begin_scope();
3970 statement("int firstLive = findMSB(value.y);");
3971 statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
3972 end_scope();
3973 statement("#endif");
3974 statement("");
3975 }
3976
3977 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
3978 {
3979 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
3980
3981 for (auto &e : exts)
3982 {
3983 const char *name = Supp::get_extension_name(e);
3984 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3985
3986 switch (e)
3987 {
3988 case Supp::NV_gpu_shader_5:
3989 statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
3990 statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
3991 statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
3992 break;
3993 case Supp::ARB_shader_group_vote:
3994 statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
3995 statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
3996 statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
3997 break;
3998 case Supp::AMD_gcn_shader:
3999 statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
4000 statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
4001 statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
4002 "b == ballotAMD(true); }");
4003 break;
4004 default:
4005 break;
4006 }
4007 }
4008 statement("#endif");
4009 statement("");
4010 }
4011
4012 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
4013 {
4014 statement("#ifndef GL_KHR_shader_subgroup_vote");
4015 statement(
4016 "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
4017 "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
4018 for (const char *t : workaround_types)
4019 statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
4020 statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
4021 statement("#endif");
4022 statement("");
4023 }
4024
4025 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
4026 {
4027 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
4028
4029 for (auto &e : exts)
4030 {
4031 const char *name = Supp::get_extension_name(e);
4032 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
4033
4034 switch (e)
4035 {
4036 case Supp::NV_shader_thread_group:
4037 statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
4038 break;
4039 case Supp::ARB_shader_ballot:
4040 statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
4041 break;
4042 default:
4043 break;
4044 }
4045 }
4046 statement("#endif");
4047 statement("");
4048 }
4049
4050 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
4051 {
4052 statement("#ifndef GL_KHR_shader_subgroup_basic");
4053 statement("bool subgroupElect()");
4054 begin_scope();
4055 statement("uvec4 activeMask = subgroupBallot(true);");
4056 statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
4057 statement("return gl_SubgroupInvocationID == firstLive;");
4058 end_scope();
4059 statement("#endif");
4060 statement("");
4061 }
4062
4063 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
4064 {
4065 // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
4066 // that subgroup execute in lockstep so this barrier is implicit.
4067 // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
4068 // and a specific test of optimizing scans by leveraging lock-step invocation execution,
4069 // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
4070 // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
4071 statement("#ifndef GL_KHR_shader_subgroup_basic");
4072 statement("void subgroupBarrier() { memoryBarrierShared(); }");
4073 statement("#endif");
4074 statement("");
4075 }
4076
4077 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
4078 {
4079 if (model == spv::ExecutionModelGLCompute)
4080 {
4081 statement("#ifndef GL_KHR_shader_subgroup_basic");
4082 statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
4083 statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
4084 statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
4085 statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
4086 statement("#endif");
4087 }
4088 else
4089 {
4090 statement("#ifndef GL_KHR_shader_subgroup_basic");
4091 statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
4092 statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
4093 statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
4094 statement("#endif");
4095 }
4096 statement("");
4097 }
4098
4099 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
4100 {
4101 statement("#ifndef GL_KHR_shader_subgroup_ballot");
4102 statement("bool subgroupInverseBallot(uvec4 value)");
4103 begin_scope();
4104 statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
4105 end_scope();
4106
4107 statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
4108 begin_scope();
4109 statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
4110 statement("ivec2 c = bitCount(v);");
4111 statement_no_indent("#ifdef GL_NV_shader_thread_group");
4112 statement("return uint(c.x);");
4113 statement_no_indent("#else");
4114 statement("return uint(c.x + c.y);");
4115 statement_no_indent("#endif");
4116 end_scope();
4117
4118 statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
4119 begin_scope();
4120 statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
4121 statement("ivec2 c = bitCount(v);");
4122 statement_no_indent("#ifdef GL_NV_shader_thread_group");
4123 statement("return uint(c.x);");
4124 statement_no_indent("#else");
4125 statement("return uint(c.x + c.y);");
4126 statement_no_indent("#endif");
4127 end_scope();
4128 statement("#endif");
4129 statement("");
4130 }
4131
4132 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
4133 {
4134 statement("#ifndef GL_KHR_shader_subgroup_ballot");
4135 statement("uint subgroupBallotBitCount(uvec4 value)");
4136 begin_scope();
4137 statement("ivec2 c = bitCount(value.xy);");
4138 statement_no_indent("#ifdef GL_NV_shader_thread_group");
4139 statement("return uint(c.x);");
4140 statement_no_indent("#else");
4141 statement("return uint(c.x + c.y);");
4142 statement_no_indent("#endif");
4143 end_scope();
4144 statement("#endif");
4145 statement("");
4146 }
4147
4148 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
4149 {
4150 statement("#ifndef GL_KHR_shader_subgroup_ballot");
4151 statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
4152 begin_scope();
4153 statement_no_indent("#ifdef GL_NV_shader_thread_group");
4154 statement("uint shifted = value.x >> index;");
4155 statement_no_indent("#else");
4156 statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
4157 statement_no_indent("#endif");
4158 statement("return (shifted & 1u) != 0u;");
4159 end_scope();
4160 statement("#endif");
4161 statement("");
4162 }
4163 }
4164
4165 if (!workaround_ubo_load_overload_types.empty())
4166 {
4167 for (auto &type_id : workaround_ubo_load_overload_types)
4168 {
4169 auto &type = get<SPIRType>(type_id);
4170 statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type),
4171 " wrap) { return wrap; }");
4172 }
4173 statement("");
4174 }
4175
4176 if (requires_transpose_2x2)
4177 {
4178 statement("mat2 spvTranspose(mat2 m)");
4179 begin_scope();
4180 statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
4181 end_scope();
4182 statement("");
4183 }
4184
4185 if (requires_transpose_3x3)
4186 {
4187 statement("mat3 spvTranspose(mat3 m)");
4188 begin_scope();
4189 statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
4190 end_scope();
4191 statement("");
4192 }
4193
4194 if (requires_transpose_4x4)
4195 {
4196 statement("mat4 spvTranspose(mat4 m)");
4197 begin_scope();
4198 statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
4199 "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
4200 end_scope();
4201 statement("");
4202 }
4203 }
4204
4205 // Returns a string representation of the ID, usable as a function arg.
4206 // Default is to simply return the expression representation fo the arg ID.
4207 // Subclasses may override to modify the return value.
to_func_call_arg(const SPIRFunction::Parameter &,uint32_t id)4208 string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
4209 {
4210 // Make sure that we use the name of the original variable, and not the parameter alias.
4211 uint32_t name_id = id;
4212 auto *var = maybe_get<SPIRVariable>(id);
4213 if (var && var->basevariable)
4214 name_id = var->basevariable;
4215 return to_expression(name_id);
4216 }
4217
handle_invalid_expression(uint32_t id)4218 void CompilerGLSL::handle_invalid_expression(uint32_t id)
4219 {
4220 // We tried to read an invalidated expression.
4221 // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
4222 forced_temporaries.insert(id);
4223 force_recompile();
4224 }
4225
4226 // Converts the format of the current expression from packed to unpacked,
4227 // by wrapping the expression in a constructor of the appropriate type.
4228 // GLSL does not support packed formats, so simply return the expression.
4229 // Subclasses that do will override.
unpack_expression_type(string expr_str,const SPIRType &,uint32_t,bool,bool)4230 string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
4231 {
4232 return expr_str;
4233 }
4234
4235 // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
strip_enclosed_expression(string & expr)4236 void CompilerGLSL::strip_enclosed_expression(string &expr)
4237 {
4238 if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
4239 return;
4240
4241 // Have to make sure that our first and last parens actually enclose everything inside it.
4242 uint32_t paren_count = 0;
4243 for (auto &c : expr)
4244 {
4245 if (c == '(')
4246 paren_count++;
4247 else if (c == ')')
4248 {
4249 paren_count--;
4250
4251 // If we hit 0 and this is not the final char, our first and final parens actually don't
4252 // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
4253 if (paren_count == 0 && &c != &expr.back())
4254 return;
4255 }
4256 }
4257 expr.erase(expr.size() - 1, 1);
4258 expr.erase(begin(expr));
4259 }
4260
enclose_expression(const string & expr)4261 string CompilerGLSL::enclose_expression(const string &expr)
4262 {
4263 bool need_parens = false;
4264
4265 // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
4266 // unary expressions.
4267 if (!expr.empty())
4268 {
4269 auto c = expr.front();
4270 if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
4271 need_parens = true;
4272 }
4273
4274 if (!need_parens)
4275 {
4276 uint32_t paren_count = 0;
4277 for (auto c : expr)
4278 {
4279 if (c == '(' || c == '[')
4280 paren_count++;
4281 else if (c == ')' || c == ']')
4282 {
4283 assert(paren_count);
4284 paren_count--;
4285 }
4286 else if (c == ' ' && paren_count == 0)
4287 {
4288 need_parens = true;
4289 break;
4290 }
4291 }
4292 assert(paren_count == 0);
4293 }
4294
4295 // If this expression contains any spaces which are not enclosed by parentheses,
4296 // we need to enclose it so we can treat the whole string as an expression.
4297 // This happens when two expressions have been part of a binary op earlier.
4298 if (need_parens)
4299 return join('(', expr, ')');
4300 else
4301 return expr;
4302 }
4303
dereference_expression(const SPIRType & expr_type,const std::string & expr)4304 string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
4305 {
4306 // If this expression starts with an address-of operator ('&'), then
4307 // just return the part after the operator.
4308 // TODO: Strip parens if unnecessary?
4309 if (expr.front() == '&')
4310 return expr.substr(1);
4311 else if (backend.native_pointers)
4312 return join('*', expr);
4313 else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
4314 expr_type.pointer_depth == 1)
4315 {
4316 return join(enclose_expression(expr), ".value");
4317 }
4318 else
4319 return expr;
4320 }
4321
address_of_expression(const std::string & expr)4322 string CompilerGLSL::address_of_expression(const std::string &expr)
4323 {
4324 if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
4325 {
4326 // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
4327 // the first two and last characters. We might have to enclose the expression.
4328 // This doesn't work for cases like (*foo + 10),
4329 // but this is an r-value expression which we cannot take the address of anyways.
4330 return enclose_expression(expr.substr(2, expr.size() - 3));
4331 }
4332 else if (expr.front() == '*')
4333 {
4334 // If this expression starts with a dereference operator ('*'), then
4335 // just return the part after the operator.
4336 return expr.substr(1);
4337 }
4338 else
4339 return join('&', enclose_expression(expr));
4340 }
4341
4342 // Just like to_expression except that we enclose the expression inside parentheses if needed.
to_enclosed_expression(uint32_t id,bool register_expression_read)4343 string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
4344 {
4345 return enclose_expression(to_expression(id, register_expression_read));
4346 }
4347
4348 // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
4349 // need_transpose must be forced to false.
to_unpacked_row_major_matrix_expression(uint32_t id)4350 string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
4351 {
4352 return unpack_expression_type(to_expression(id), expression_type(id),
4353 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4354 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
4355 }
4356
to_unpacked_expression(uint32_t id,bool register_expression_read)4357 string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
4358 {
4359 // If we need to transpose, it will also take care of unpacking rules.
4360 auto *e = maybe_get<SPIRExpression>(id);
4361 bool need_transpose = e && e->need_transpose;
4362 bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4363 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4364
4365 if (!need_transpose && (is_remapped || is_packed))
4366 {
4367 return unpack_expression_type(to_expression(id, register_expression_read),
4368 get_pointee_type(expression_type_id(id)),
4369 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4370 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4371 }
4372 else
4373 return to_expression(id, register_expression_read);
4374 }
4375
to_enclosed_unpacked_expression(uint32_t id,bool register_expression_read)4376 string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
4377 {
4378 // If we need to transpose, it will also take care of unpacking rules.
4379 auto *e = maybe_get<SPIRExpression>(id);
4380 bool need_transpose = e && e->need_transpose;
4381 bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4382 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4383 if (!need_transpose && (is_remapped || is_packed))
4384 {
4385 return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
4386 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4387 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4388 }
4389 else
4390 return to_enclosed_expression(id, register_expression_read);
4391 }
4392
to_dereferenced_expression(uint32_t id,bool register_expression_read)4393 string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
4394 {
4395 auto &type = expression_type(id);
4396 if (type.pointer && should_dereference(id))
4397 return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
4398 else
4399 return to_expression(id, register_expression_read);
4400 }
4401
to_pointer_expression(uint32_t id,bool register_expression_read)4402 string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
4403 {
4404 auto &type = expression_type(id);
4405 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4406 return address_of_expression(to_enclosed_expression(id, register_expression_read));
4407 else
4408 return to_unpacked_expression(id, register_expression_read);
4409 }
4410
to_enclosed_pointer_expression(uint32_t id,bool register_expression_read)4411 string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
4412 {
4413 auto &type = expression_type(id);
4414 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4415 return address_of_expression(to_enclosed_expression(id, register_expression_read));
4416 else
4417 return to_enclosed_unpacked_expression(id, register_expression_read);
4418 }
4419
to_extract_component_expression(uint32_t id,uint32_t index)4420 string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
4421 {
4422 auto expr = to_enclosed_expression(id);
4423 if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
4424 return join(expr, "[", index, "]");
4425 else
4426 return join(expr, ".", index_to_swizzle(index));
4427 }
4428
to_extract_constant_composite_expression(uint32_t result_type,const SPIRConstant & c,const uint32_t * chain,uint32_t length)4429 string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
4430 const uint32_t *chain, uint32_t length)
4431 {
4432 // It is kinda silly if application actually enter this path since they know the constant up front.
4433 // It is useful here to extract the plain constant directly.
4434 SPIRConstant tmp;
4435 tmp.constant_type = result_type;
4436 auto &composite_type = get<SPIRType>(c.constant_type);
4437 assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
4438 assert(!c.specialization);
4439
4440 if (is_matrix(composite_type))
4441 {
4442 if (length == 2)
4443 {
4444 tmp.m.c[0].vecsize = 1;
4445 tmp.m.columns = 1;
4446 tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
4447 }
4448 else
4449 {
4450 assert(length == 1);
4451 tmp.m.c[0].vecsize = composite_type.vecsize;
4452 tmp.m.columns = 1;
4453 tmp.m.c[0] = c.m.c[chain[0]];
4454 }
4455 }
4456 else
4457 {
4458 assert(length == 1);
4459 tmp.m.c[0].vecsize = 1;
4460 tmp.m.columns = 1;
4461 tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
4462 }
4463
4464 return constant_expression(tmp);
4465 }
4466
to_rerolled_array_expression(const string & base_expr,const SPIRType & type)4467 string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
4468 {
4469 uint32_t size = to_array_size_literal(type);
4470 auto &parent = get<SPIRType>(type.parent_type);
4471 string expr = "{ ";
4472
4473 for (uint32_t i = 0; i < size; i++)
4474 {
4475 auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
4476 if (parent.array.empty())
4477 expr += subexpr;
4478 else
4479 expr += to_rerolled_array_expression(subexpr, parent);
4480
4481 if (i + 1 < size)
4482 expr += ", ";
4483 }
4484
4485 expr += " }";
4486 return expr;
4487 }
4488
to_composite_constructor_expression(uint32_t id,bool uses_buffer_offset)4489 string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset)
4490 {
4491 auto &type = expression_type(id);
4492
4493 bool reroll_array = !type.array.empty() && (!backend.array_is_value_type ||
4494 (uses_buffer_offset && !backend.buffer_offset_array_is_value_type));
4495
4496 if (reroll_array)
4497 {
4498 // For this case, we need to "re-roll" an array initializer from a temporary.
4499 // We cannot simply pass the array directly, since it decays to a pointer and it cannot
4500 // participate in a struct initializer. E.g.
4501 // float arr[2] = { 1.0, 2.0 };
4502 // Foo foo = { arr }; must be transformed to
4503 // Foo foo = { { arr[0], arr[1] } };
4504 // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
4505
4506 // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
4507 // as temporaries anyways.
4508 return to_rerolled_array_expression(to_enclosed_expression(id), type);
4509 }
4510 else
4511 return to_unpacked_expression(id);
4512 }
4513
to_non_uniform_aware_expression(uint32_t id)4514 string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
4515 {
4516 string expr = to_expression(id);
4517
4518 if (has_decoration(id, DecorationNonUniform))
4519 convert_non_uniform_expression(expr, id);
4520
4521 return expr;
4522 }
4523
to_expression(uint32_t id,bool register_expression_read)4524 string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
4525 {
4526 auto itr = invalid_expressions.find(id);
4527 if (itr != end(invalid_expressions))
4528 handle_invalid_expression(id);
4529
4530 if (ir.ids[id].get_type() == TypeExpression)
4531 {
4532 // We might have a more complex chain of dependencies.
4533 // A possible scenario is that we
4534 //
4535 // %1 = OpLoad
4536 // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
4537 // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
4538 // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
4539 // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
4540 //
4541 // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
4542 // and see that we should not forward reads of the original variable.
4543 auto &expr = get<SPIRExpression>(id);
4544 for (uint32_t dep : expr.expression_dependencies)
4545 if (invalid_expressions.find(dep) != end(invalid_expressions))
4546 handle_invalid_expression(dep);
4547 }
4548
4549 if (register_expression_read)
4550 track_expression_read(id);
4551
4552 switch (ir.ids[id].get_type())
4553 {
4554 case TypeExpression:
4555 {
4556 auto &e = get<SPIRExpression>(id);
4557 if (e.base_expression)
4558 return to_enclosed_expression(e.base_expression) + e.expression;
4559 else if (e.need_transpose)
4560 {
4561 // This should not be reached for access chains, since we always deal explicitly with transpose state
4562 // when consuming an access chain expression.
4563 uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4564 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4565 return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
4566 is_packed);
4567 }
4568 else if (flattened_structs.count(id))
4569 {
4570 return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
4571 }
4572 else
4573 {
4574 if (is_forcing_recompilation())
4575 {
4576 // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
4577 // Avoid this by returning dummy expressions during this phase.
4578 // Do not use empty expressions here, because those are sentinels for other cases.
4579 return "_";
4580 }
4581 else
4582 return e.expression;
4583 }
4584 }
4585
4586 case TypeConstant:
4587 {
4588 auto &c = get<SPIRConstant>(id);
4589 auto &type = get<SPIRType>(c.constant_type);
4590
4591 // WorkGroupSize may be a constant.
4592 auto &dec = ir.meta[c.self].decoration;
4593 if (dec.builtin)
4594 return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
4595 else if (c.specialization)
4596 return to_name(id);
4597 else if (c.is_used_as_lut)
4598 return to_name(id);
4599 else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
4600 return to_name(id);
4601 else if (!type.array.empty() && !backend.can_declare_arrays_inline)
4602 return to_name(id);
4603 else
4604 return constant_expression(c);
4605 }
4606
4607 case TypeConstantOp:
4608 return to_name(id);
4609
4610 case TypeVariable:
4611 {
4612 auto &var = get<SPIRVariable>(id);
4613 // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
4614 // the variable has not been declared yet.
4615 if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
4616 return to_expression(var.static_expression);
4617 else if (var.deferred_declaration)
4618 {
4619 var.deferred_declaration = false;
4620 return variable_decl(var);
4621 }
4622 else if (flattened_structs.count(id))
4623 {
4624 return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
4625 }
4626 else
4627 {
4628 auto &dec = ir.meta[var.self].decoration;
4629 if (dec.builtin)
4630 return builtin_to_glsl(dec.builtin_type, var.storage);
4631 else
4632 return to_name(id);
4633 }
4634 }
4635
4636 case TypeCombinedImageSampler:
4637 // This type should never be taken the expression of directly.
4638 // The intention is that texture sampling functions will extract the image and samplers
4639 // separately and take their expressions as needed.
4640 // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
4641 // expression ala sampler2D(texture, sampler).
4642 SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
4643
4644 case TypeAccessChain:
4645 // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
4646 SPIRV_CROSS_THROW("Access chains have no default expression representation.");
4647
4648 default:
4649 return to_name(id);
4650 }
4651 }
4652
constant_op_expression(const SPIRConstantOp & cop)4653 string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
4654 {
4655 auto &type = get<SPIRType>(cop.basetype);
4656 bool binary = false;
4657 bool unary = false;
4658 string op;
4659
4660 if (is_legacy() && is_unsigned_opcode(cop.opcode))
4661 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
4662
4663 // TODO: Find a clean way to reuse emit_instruction.
4664 switch (cop.opcode)
4665 {
4666 case OpSConvert:
4667 case OpUConvert:
4668 case OpFConvert:
4669 op = type_to_glsl_constructor(type);
4670 break;
4671
4672 #define GLSL_BOP(opname, x) \
4673 case Op##opname: \
4674 binary = true; \
4675 op = x; \
4676 break
4677
4678 #define GLSL_UOP(opname, x) \
4679 case Op##opname: \
4680 unary = true; \
4681 op = x; \
4682 break
4683
4684 GLSL_UOP(SNegate, "-");
4685 GLSL_UOP(Not, "~");
4686 GLSL_BOP(IAdd, "+");
4687 GLSL_BOP(ISub, "-");
4688 GLSL_BOP(IMul, "*");
4689 GLSL_BOP(SDiv, "/");
4690 GLSL_BOP(UDiv, "/");
4691 GLSL_BOP(UMod, "%");
4692 GLSL_BOP(SMod, "%");
4693 GLSL_BOP(ShiftRightLogical, ">>");
4694 GLSL_BOP(ShiftRightArithmetic, ">>");
4695 GLSL_BOP(ShiftLeftLogical, "<<");
4696 GLSL_BOP(BitwiseOr, "|");
4697 GLSL_BOP(BitwiseXor, "^");
4698 GLSL_BOP(BitwiseAnd, "&");
4699 GLSL_BOP(LogicalOr, "||");
4700 GLSL_BOP(LogicalAnd, "&&");
4701 GLSL_UOP(LogicalNot, "!");
4702 GLSL_BOP(LogicalEqual, "==");
4703 GLSL_BOP(LogicalNotEqual, "!=");
4704 GLSL_BOP(IEqual, "==");
4705 GLSL_BOP(INotEqual, "!=");
4706 GLSL_BOP(ULessThan, "<");
4707 GLSL_BOP(SLessThan, "<");
4708 GLSL_BOP(ULessThanEqual, "<=");
4709 GLSL_BOP(SLessThanEqual, "<=");
4710 GLSL_BOP(UGreaterThan, ">");
4711 GLSL_BOP(SGreaterThan, ">");
4712 GLSL_BOP(UGreaterThanEqual, ">=");
4713 GLSL_BOP(SGreaterThanEqual, ">=");
4714
4715 case OpSelect:
4716 {
4717 if (cop.arguments.size() < 3)
4718 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4719
4720 // This one is pretty annoying. It's triggered from
4721 // uint(bool), int(bool) from spec constants.
4722 // In order to preserve its compile-time constness in Vulkan GLSL,
4723 // we need to reduce the OpSelect expression back to this simplified model.
4724 // If we cannot, fail.
4725 if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
4726 {
4727 // Implement as a simple cast down below.
4728 }
4729 else
4730 {
4731 // Implement a ternary and pray the compiler understands it :)
4732 return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
4733 }
4734 break;
4735 }
4736
4737 case OpVectorShuffle:
4738 {
4739 string expr = type_to_glsl_constructor(type);
4740 expr += "(";
4741
4742 uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
4743 string left_arg = to_enclosed_expression(cop.arguments[0]);
4744 string right_arg = to_enclosed_expression(cop.arguments[1]);
4745
4746 for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
4747 {
4748 uint32_t index = cop.arguments[i];
4749 if (index >= left_components)
4750 expr += right_arg + "." + "xyzw"[index - left_components];
4751 else
4752 expr += left_arg + "." + "xyzw"[index];
4753
4754 if (i + 1 < uint32_t(cop.arguments.size()))
4755 expr += ", ";
4756 }
4757
4758 expr += ")";
4759 return expr;
4760 }
4761
4762 case OpCompositeExtract:
4763 {
4764 auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
4765 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
4766 return expr;
4767 }
4768
4769 case OpCompositeInsert:
4770 SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
4771
4772 default:
4773 // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
4774 SPIRV_CROSS_THROW("Unimplemented spec constant op.");
4775 }
4776
4777 uint32_t bit_width = 0;
4778 if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4779 bit_width = expression_type(cop.arguments[0]).width;
4780
4781 SPIRType::BaseType input_type;
4782 bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
4783
4784 switch (cop.opcode)
4785 {
4786 case OpIEqual:
4787 case OpINotEqual:
4788 input_type = to_signed_basetype(bit_width);
4789 break;
4790
4791 case OpSLessThan:
4792 case OpSLessThanEqual:
4793 case OpSGreaterThan:
4794 case OpSGreaterThanEqual:
4795 case OpSMod:
4796 case OpSDiv:
4797 case OpShiftRightArithmetic:
4798 case OpSConvert:
4799 case OpSNegate:
4800 input_type = to_signed_basetype(bit_width);
4801 break;
4802
4803 case OpULessThan:
4804 case OpULessThanEqual:
4805 case OpUGreaterThan:
4806 case OpUGreaterThanEqual:
4807 case OpUMod:
4808 case OpUDiv:
4809 case OpShiftRightLogical:
4810 case OpUConvert:
4811 input_type = to_unsigned_basetype(bit_width);
4812 break;
4813
4814 default:
4815 input_type = type.basetype;
4816 break;
4817 }
4818
4819 #undef GLSL_BOP
4820 #undef GLSL_UOP
4821 if (binary)
4822 {
4823 if (cop.arguments.size() < 2)
4824 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4825
4826 string cast_op0;
4827 string cast_op1;
4828 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
4829 cop.arguments[1], skip_cast_if_equal_type);
4830
4831 if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
4832 {
4833 expected_type.basetype = input_type;
4834 auto expr = bitcast_glsl_op(type, expected_type);
4835 expr += '(';
4836 expr += join(cast_op0, " ", op, " ", cast_op1);
4837 expr += ')';
4838 return expr;
4839 }
4840 else
4841 return join("(", cast_op0, " ", op, " ", cast_op1, ")");
4842 }
4843 else if (unary)
4844 {
4845 if (cop.arguments.size() < 1)
4846 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4847
4848 // Auto-bitcast to result type as needed.
4849 // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
4850 return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
4851 }
4852 else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4853 {
4854 if (cop.arguments.size() < 1)
4855 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4856
4857 auto &arg_type = expression_type(cop.arguments[0]);
4858 if (arg_type.width < type.width && input_type != arg_type.basetype)
4859 {
4860 auto expected = arg_type;
4861 expected.basetype = input_type;
4862 return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
4863 }
4864 else
4865 return join(op, "(", to_expression(cop.arguments[0]), ")");
4866 }
4867 else
4868 {
4869 if (cop.arguments.size() < 1)
4870 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4871 return join(op, "(", to_expression(cop.arguments[0]), ")");
4872 }
4873 }
4874
constant_expression(const SPIRConstant & c)4875 string CompilerGLSL::constant_expression(const SPIRConstant &c)
4876 {
4877 auto &type = get<SPIRType>(c.constant_type);
4878
4879 if (type.pointer)
4880 {
4881 return backend.null_pointer_literal;
4882 }
4883 else if (!c.subconstants.empty())
4884 {
4885 // Handles Arrays and structures.
4886 string res;
4887
4888 // Allow Metal to use the array<T> template to make arrays a value type
4889 bool needs_trailing_tracket = false;
4890 if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
4891 type.array.empty())
4892 {
4893 res = type_to_glsl_constructor(type) + "{ ";
4894 }
4895 else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
4896 !type.array.empty())
4897 {
4898 res = type_to_glsl_constructor(type) + "({ ";
4899 needs_trailing_tracket = true;
4900 }
4901 else if (backend.use_initializer_list)
4902 {
4903 res = "{ ";
4904 }
4905 else
4906 {
4907 res = type_to_glsl_constructor(type) + "(";
4908 }
4909
4910 for (auto &elem : c.subconstants)
4911 {
4912 auto &subc = get<SPIRConstant>(elem);
4913 if (subc.specialization)
4914 res += to_name(elem);
4915 else
4916 res += constant_expression(subc);
4917
4918 if (&elem != &c.subconstants.back())
4919 res += ", ";
4920 }
4921
4922 res += backend.use_initializer_list ? " }" : ")";
4923 if (needs_trailing_tracket)
4924 res += ")";
4925
4926 return res;
4927 }
4928 else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
4929 {
4930 // Metal tessellation likes empty structs which are then constant expressions.
4931 if (backend.supports_empty_struct)
4932 return "{ }";
4933 else if (backend.use_typed_initializer_list)
4934 return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
4935 else if (backend.use_initializer_list)
4936 return "{ 0 }";
4937 else
4938 return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
4939 }
4940 else if (c.columns() == 1)
4941 {
4942 return constant_expression_vector(c, 0);
4943 }
4944 else
4945 {
4946 string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
4947 for (uint32_t col = 0; col < c.columns(); col++)
4948 {
4949 if (c.specialization_constant_id(col) != 0)
4950 res += to_name(c.specialization_constant_id(col));
4951 else
4952 res += constant_expression_vector(c, col);
4953
4954 if (col + 1 < c.columns())
4955 res += ", ";
4956 }
4957 res += ")";
4958 return res;
4959 }
4960 }
4961
4962 #ifdef _MSC_VER
4963 // sprintf warning.
4964 // We cannot rely on snprintf existing because, ..., MSVC.
4965 #pragma warning(push)
4966 #pragma warning(disable : 4996)
4967 #endif
4968
convert_half_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)4969 string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
4970 {
4971 string res;
4972 float float_value = c.scalar_f16(col, row);
4973
4974 // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
4975 // of complicated workarounds, just value-cast to the half type always.
4976 if (std::isnan(float_value) || std::isinf(float_value))
4977 {
4978 SPIRType type;
4979 type.basetype = SPIRType::Half;
4980 type.vecsize = 1;
4981 type.columns = 1;
4982
4983 if (float_value == numeric_limits<float>::infinity())
4984 res = join(type_to_glsl(type), "(1.0 / 0.0)");
4985 else if (float_value == -numeric_limits<float>::infinity())
4986 res = join(type_to_glsl(type), "(-1.0 / 0.0)");
4987 else if (std::isnan(float_value))
4988 res = join(type_to_glsl(type), "(0.0 / 0.0)");
4989 else
4990 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
4991 }
4992 else
4993 {
4994 SPIRType type;
4995 type.basetype = SPIRType::Half;
4996 type.vecsize = 1;
4997 type.columns = 1;
4998 res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
4999 }
5000
5001 return res;
5002 }
5003
convert_float_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)5004 string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5005 {
5006 string res;
5007 float float_value = c.scalar_f32(col, row);
5008
5009 if (std::isnan(float_value) || std::isinf(float_value))
5010 {
5011 // Use special representation.
5012 if (!is_legacy())
5013 {
5014 SPIRType out_type;
5015 SPIRType in_type;
5016 out_type.basetype = SPIRType::Float;
5017 in_type.basetype = SPIRType::UInt;
5018 out_type.vecsize = 1;
5019 in_type.vecsize = 1;
5020 out_type.width = 32;
5021 in_type.width = 32;
5022
5023 char print_buffer[32];
5024 sprintf(print_buffer, "0x%xu", c.scalar(col, row));
5025
5026 const char *comment = "inf";
5027 if (float_value == -numeric_limits<float>::infinity())
5028 comment = "-inf";
5029 else if (std::isnan(float_value))
5030 comment = "nan";
5031 res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
5032 }
5033 else
5034 {
5035 if (float_value == numeric_limits<float>::infinity())
5036 {
5037 if (backend.float_literal_suffix)
5038 res = "(1.0f / 0.0f)";
5039 else
5040 res = "(1.0 / 0.0)";
5041 }
5042 else if (float_value == -numeric_limits<float>::infinity())
5043 {
5044 if (backend.float_literal_suffix)
5045 res = "(-1.0f / 0.0f)";
5046 else
5047 res = "(-1.0 / 0.0)";
5048 }
5049 else if (std::isnan(float_value))
5050 {
5051 if (backend.float_literal_suffix)
5052 res = "(0.0f / 0.0f)";
5053 else
5054 res = "(0.0 / 0.0)";
5055 }
5056 else
5057 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5058 }
5059 }
5060 else
5061 {
5062 res = convert_to_string(float_value, current_locale_radix_character);
5063 if (backend.float_literal_suffix)
5064 res += "f";
5065 }
5066
5067 return res;
5068 }
5069
convert_double_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)5070 std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5071 {
5072 string res;
5073 double double_value = c.scalar_f64(col, row);
5074
5075 if (std::isnan(double_value) || std::isinf(double_value))
5076 {
5077 // Use special representation.
5078 if (!is_legacy())
5079 {
5080 SPIRType out_type;
5081 SPIRType in_type;
5082 out_type.basetype = SPIRType::Double;
5083 in_type.basetype = SPIRType::UInt64;
5084 out_type.vecsize = 1;
5085 in_type.vecsize = 1;
5086 out_type.width = 64;
5087 in_type.width = 64;
5088
5089 uint64_t u64_value = c.scalar_u64(col, row);
5090
5091 if (options.es)
5092 SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
5093 require_extension_internal("GL_ARB_gpu_shader_int64");
5094
5095 char print_buffer[64];
5096 sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
5097 backend.long_long_literal_suffix ? "ull" : "ul");
5098
5099 const char *comment = "inf";
5100 if (double_value == -numeric_limits<double>::infinity())
5101 comment = "-inf";
5102 else if (std::isnan(double_value))
5103 comment = "nan";
5104 res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
5105 }
5106 else
5107 {
5108 if (options.es)
5109 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
5110 if (options.version < 400)
5111 require_extension_internal("GL_ARB_gpu_shader_fp64");
5112
5113 if (double_value == numeric_limits<double>::infinity())
5114 {
5115 if (backend.double_literal_suffix)
5116 res = "(1.0lf / 0.0lf)";
5117 else
5118 res = "(1.0 / 0.0)";
5119 }
5120 else if (double_value == -numeric_limits<double>::infinity())
5121 {
5122 if (backend.double_literal_suffix)
5123 res = "(-1.0lf / 0.0lf)";
5124 else
5125 res = "(-1.0 / 0.0)";
5126 }
5127 else if (std::isnan(double_value))
5128 {
5129 if (backend.double_literal_suffix)
5130 res = "(0.0lf / 0.0lf)";
5131 else
5132 res = "(0.0 / 0.0)";
5133 }
5134 else
5135 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5136 }
5137 }
5138 else
5139 {
5140 res = convert_to_string(double_value, current_locale_radix_character);
5141 if (backend.double_literal_suffix)
5142 res += "lf";
5143 }
5144
5145 return res;
5146 }
5147
5148 #ifdef _MSC_VER
5149 #pragma warning(pop)
5150 #endif
5151
constant_expression_vector(const SPIRConstant & c,uint32_t vector)5152 string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
5153 {
5154 auto type = get<SPIRType>(c.constant_type);
5155 type.columns = 1;
5156
5157 auto scalar_type = type;
5158 scalar_type.vecsize = 1;
5159
5160 string res;
5161 bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
5162 bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
5163
5164 if (!type_is_floating_point(type))
5165 {
5166 // Cannot swizzle literal integers as a special case.
5167 swizzle_splat = false;
5168 }
5169
5170 if (splat || swizzle_splat)
5171 {
5172 // Cannot use constant splatting if we have specialization constants somewhere in the vector.
5173 for (uint32_t i = 0; i < c.vector_size(); i++)
5174 {
5175 if (c.specialization_constant_id(vector, i) != 0)
5176 {
5177 splat = false;
5178 swizzle_splat = false;
5179 break;
5180 }
5181 }
5182 }
5183
5184 if (splat || swizzle_splat)
5185 {
5186 if (type.width == 64)
5187 {
5188 uint64_t ident = c.scalar_u64(vector, 0);
5189 for (uint32_t i = 1; i < c.vector_size(); i++)
5190 {
5191 if (ident != c.scalar_u64(vector, i))
5192 {
5193 splat = false;
5194 swizzle_splat = false;
5195 break;
5196 }
5197 }
5198 }
5199 else
5200 {
5201 uint32_t ident = c.scalar(vector, 0);
5202 for (uint32_t i = 1; i < c.vector_size(); i++)
5203 {
5204 if (ident != c.scalar(vector, i))
5205 {
5206 splat = false;
5207 swizzle_splat = false;
5208 }
5209 }
5210 }
5211 }
5212
5213 if (c.vector_size() > 1 && !swizzle_splat)
5214 res += type_to_glsl(type) + "(";
5215
5216 switch (type.basetype)
5217 {
5218 case SPIRType::Half:
5219 if (splat || swizzle_splat)
5220 {
5221 res += convert_half_to_string(c, vector, 0);
5222 if (swizzle_splat)
5223 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5224 }
5225 else
5226 {
5227 for (uint32_t i = 0; i < c.vector_size(); i++)
5228 {
5229 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5230 res += to_name(c.specialization_constant_id(vector, i));
5231 else
5232 res += convert_half_to_string(c, vector, i);
5233
5234 if (i + 1 < c.vector_size())
5235 res += ", ";
5236 }
5237 }
5238 break;
5239
5240 case SPIRType::Float:
5241 if (splat || swizzle_splat)
5242 {
5243 res += convert_float_to_string(c, vector, 0);
5244 if (swizzle_splat)
5245 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5246 }
5247 else
5248 {
5249 for (uint32_t i = 0; i < c.vector_size(); i++)
5250 {
5251 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5252 res += to_name(c.specialization_constant_id(vector, i));
5253 else
5254 res += convert_float_to_string(c, vector, i);
5255
5256 if (i + 1 < c.vector_size())
5257 res += ", ";
5258 }
5259 }
5260 break;
5261
5262 case SPIRType::Double:
5263 if (splat || swizzle_splat)
5264 {
5265 res += convert_double_to_string(c, vector, 0);
5266 if (swizzle_splat)
5267 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5268 }
5269 else
5270 {
5271 for (uint32_t i = 0; i < c.vector_size(); i++)
5272 {
5273 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5274 res += to_name(c.specialization_constant_id(vector, i));
5275 else
5276 res += convert_double_to_string(c, vector, i);
5277
5278 if (i + 1 < c.vector_size())
5279 res += ", ";
5280 }
5281 }
5282 break;
5283
5284 case SPIRType::Int64:
5285 if (splat)
5286 {
5287 res += convert_to_string(c.scalar_i64(vector, 0));
5288 if (backend.long_long_literal_suffix)
5289 res += "ll";
5290 else
5291 res += "l";
5292 }
5293 else
5294 {
5295 for (uint32_t i = 0; i < c.vector_size(); i++)
5296 {
5297 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5298 res += to_name(c.specialization_constant_id(vector, i));
5299 else
5300 {
5301 res += convert_to_string(c.scalar_i64(vector, i));
5302 if (backend.long_long_literal_suffix)
5303 res += "ll";
5304 else
5305 res += "l";
5306 }
5307
5308 if (i + 1 < c.vector_size())
5309 res += ", ";
5310 }
5311 }
5312 break;
5313
5314 case SPIRType::UInt64:
5315 if (splat)
5316 {
5317 res += convert_to_string(c.scalar_u64(vector, 0));
5318 if (backend.long_long_literal_suffix)
5319 res += "ull";
5320 else
5321 res += "ul";
5322 }
5323 else
5324 {
5325 for (uint32_t i = 0; i < c.vector_size(); i++)
5326 {
5327 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5328 res += to_name(c.specialization_constant_id(vector, i));
5329 else
5330 {
5331 res += convert_to_string(c.scalar_u64(vector, i));
5332 if (backend.long_long_literal_suffix)
5333 res += "ull";
5334 else
5335 res += "ul";
5336 }
5337
5338 if (i + 1 < c.vector_size())
5339 res += ", ";
5340 }
5341 }
5342 break;
5343
5344 case SPIRType::UInt:
5345 if (splat)
5346 {
5347 res += convert_to_string(c.scalar(vector, 0));
5348 if (is_legacy())
5349 {
5350 // Fake unsigned constant literals with signed ones if possible.
5351 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5352 if (c.scalar_i32(vector, 0) < 0)
5353 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
5354 }
5355 else if (backend.uint32_t_literal_suffix)
5356 res += "u";
5357 }
5358 else
5359 {
5360 for (uint32_t i = 0; i < c.vector_size(); i++)
5361 {
5362 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5363 res += to_name(c.specialization_constant_id(vector, i));
5364 else
5365 {
5366 res += convert_to_string(c.scalar(vector, i));
5367 if (is_legacy())
5368 {
5369 // Fake unsigned constant literals with signed ones if possible.
5370 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5371 if (c.scalar_i32(vector, i) < 0)
5372 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
5373 "the literal negative.");
5374 }
5375 else if (backend.uint32_t_literal_suffix)
5376 res += "u";
5377 }
5378
5379 if (i + 1 < c.vector_size())
5380 res += ", ";
5381 }
5382 }
5383 break;
5384
5385 case SPIRType::Int:
5386 if (splat)
5387 res += convert_to_string(c.scalar_i32(vector, 0));
5388 else
5389 {
5390 for (uint32_t i = 0; i < c.vector_size(); i++)
5391 {
5392 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5393 res += to_name(c.specialization_constant_id(vector, i));
5394 else
5395 res += convert_to_string(c.scalar_i32(vector, i));
5396 if (i + 1 < c.vector_size())
5397 res += ", ";
5398 }
5399 }
5400 break;
5401
5402 case SPIRType::UShort:
5403 if (splat)
5404 {
5405 res += convert_to_string(c.scalar(vector, 0));
5406 }
5407 else
5408 {
5409 for (uint32_t i = 0; i < c.vector_size(); i++)
5410 {
5411 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5412 res += to_name(c.specialization_constant_id(vector, i));
5413 else
5414 {
5415 if (*backend.uint16_t_literal_suffix)
5416 {
5417 res += convert_to_string(c.scalar_u16(vector, i));
5418 res += backend.uint16_t_literal_suffix;
5419 }
5420 else
5421 {
5422 // If backend doesn't have a literal suffix, we need to value cast.
5423 res += type_to_glsl(scalar_type);
5424 res += "(";
5425 res += convert_to_string(c.scalar_u16(vector, i));
5426 res += ")";
5427 }
5428 }
5429
5430 if (i + 1 < c.vector_size())
5431 res += ", ";
5432 }
5433 }
5434 break;
5435
5436 case SPIRType::Short:
5437 if (splat)
5438 {
5439 res += convert_to_string(c.scalar_i16(vector, 0));
5440 }
5441 else
5442 {
5443 for (uint32_t i = 0; i < c.vector_size(); i++)
5444 {
5445 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5446 res += to_name(c.specialization_constant_id(vector, i));
5447 else
5448 {
5449 if (*backend.int16_t_literal_suffix)
5450 {
5451 res += convert_to_string(c.scalar_i16(vector, i));
5452 res += backend.int16_t_literal_suffix;
5453 }
5454 else
5455 {
5456 // If backend doesn't have a literal suffix, we need to value cast.
5457 res += type_to_glsl(scalar_type);
5458 res += "(";
5459 res += convert_to_string(c.scalar_i16(vector, i));
5460 res += ")";
5461 }
5462 }
5463
5464 if (i + 1 < c.vector_size())
5465 res += ", ";
5466 }
5467 }
5468 break;
5469
5470 case SPIRType::UByte:
5471 if (splat)
5472 {
5473 res += convert_to_string(c.scalar_u8(vector, 0));
5474 }
5475 else
5476 {
5477 for (uint32_t i = 0; i < c.vector_size(); i++)
5478 {
5479 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5480 res += to_name(c.specialization_constant_id(vector, i));
5481 else
5482 {
5483 res += type_to_glsl(scalar_type);
5484 res += "(";
5485 res += convert_to_string(c.scalar_u8(vector, i));
5486 res += ")";
5487 }
5488
5489 if (i + 1 < c.vector_size())
5490 res += ", ";
5491 }
5492 }
5493 break;
5494
5495 case SPIRType::SByte:
5496 if (splat)
5497 {
5498 res += convert_to_string(c.scalar_i8(vector, 0));
5499 }
5500 else
5501 {
5502 for (uint32_t i = 0; i < c.vector_size(); i++)
5503 {
5504 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5505 res += to_name(c.specialization_constant_id(vector, i));
5506 else
5507 {
5508 res += type_to_glsl(scalar_type);
5509 res += "(";
5510 res += convert_to_string(c.scalar_i8(vector, i));
5511 res += ")";
5512 }
5513
5514 if (i + 1 < c.vector_size())
5515 res += ", ";
5516 }
5517 }
5518 break;
5519
5520 case SPIRType::Boolean:
5521 if (splat)
5522 res += c.scalar(vector, 0) ? "true" : "false";
5523 else
5524 {
5525 for (uint32_t i = 0; i < c.vector_size(); i++)
5526 {
5527 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5528 res += to_name(c.specialization_constant_id(vector, i));
5529 else
5530 res += c.scalar(vector, i) ? "true" : "false";
5531
5532 if (i + 1 < c.vector_size())
5533 res += ", ";
5534 }
5535 }
5536 break;
5537
5538 default:
5539 SPIRV_CROSS_THROW("Invalid constant expression basetype.");
5540 }
5541
5542 if (c.vector_size() > 1 && !swizzle_splat)
5543 res += ")";
5544
5545 return res;
5546 }
5547
emit_uninitialized_temporary_expression(uint32_t type,uint32_t id)5548 SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
5549 {
5550 forced_temporaries.insert(id);
5551 emit_uninitialized_temporary(type, id);
5552 return set<SPIRExpression>(id, to_name(id), type, true);
5553 }
5554
emit_uninitialized_temporary(uint32_t result_type,uint32_t result_id)5555 void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
5556 {
5557 // If we're declaring temporaries inside continue blocks,
5558 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5559 if (current_continue_block && !hoisted_temporaries.count(result_id))
5560 {
5561 auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5562 if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5563 [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5564 return tmp.first == result_type && tmp.second == result_id;
5565 }) == end(header.declare_temporary))
5566 {
5567 header.declare_temporary.emplace_back(result_type, result_id);
5568 hoisted_temporaries.insert(result_id);
5569 force_recompile();
5570 }
5571 }
5572 else if (hoisted_temporaries.count(result_id) == 0)
5573 {
5574 auto &type = get<SPIRType>(result_type);
5575 auto &flags = ir.meta[result_id].decoration.decoration_flags;
5576
5577 // The result_id has not been made into an expression yet, so use flags interface.
5578 add_local_variable_name(result_id);
5579
5580 string initializer;
5581 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
5582 initializer = join(" = ", to_zero_initialized_expression(result_type));
5583
5584 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
5585 }
5586 }
5587
declare_temporary(uint32_t result_type,uint32_t result_id)5588 string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
5589 {
5590 auto &type = get<SPIRType>(result_type);
5591 auto &flags = ir.meta[result_id].decoration.decoration_flags;
5592
5593 // If we're declaring temporaries inside continue blocks,
5594 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5595 if (current_continue_block && !hoisted_temporaries.count(result_id))
5596 {
5597 auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5598 if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5599 [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5600 return tmp.first == result_type && tmp.second == result_id;
5601 }) == end(header.declare_temporary))
5602 {
5603 header.declare_temporary.emplace_back(result_type, result_id);
5604 hoisted_temporaries.insert(result_id);
5605 force_recompile();
5606 }
5607
5608 return join(to_name(result_id), " = ");
5609 }
5610 else if (hoisted_temporaries.count(result_id))
5611 {
5612 // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
5613 return join(to_name(result_id), " = ");
5614 }
5615 else
5616 {
5617 // The result_id has not been made into an expression yet, so use flags interface.
5618 add_local_variable_name(result_id);
5619 return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
5620 }
5621 }
5622
expression_is_forwarded(uint32_t id) const5623 bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
5624 {
5625 return forwarded_temporaries.count(id) != 0;
5626 }
5627
expression_suppresses_usage_tracking(uint32_t id) const5628 bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
5629 {
5630 return suppressed_usage_tracking.count(id) != 0;
5631 }
5632
expression_read_implies_multiple_reads(uint32_t id) const5633 bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
5634 {
5635 auto *expr = maybe_get<SPIRExpression>(id);
5636 if (!expr)
5637 return false;
5638
5639 // If we're emitting code at a deeper loop level than when we emitted the expression,
5640 // we're probably reading the same expression over and over.
5641 return current_loop_level > expr->emitted_loop_level;
5642 }
5643
emit_op(uint32_t result_type,uint32_t result_id,const string & rhs,bool forwarding,bool suppress_usage_tracking)5644 SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
5645 bool suppress_usage_tracking)
5646 {
5647 if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
5648 {
5649 // Just forward it without temporary.
5650 // If the forward is trivial, we do not force flushing to temporary for this expression.
5651 forwarded_temporaries.insert(result_id);
5652 if (suppress_usage_tracking)
5653 suppressed_usage_tracking.insert(result_id);
5654
5655 return set<SPIRExpression>(result_id, rhs, result_type, true);
5656 }
5657 else
5658 {
5659 // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
5660 statement(declare_temporary(result_type, result_id), rhs, ";");
5661 return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
5662 }
5663 }
5664
emit_unary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)5665 void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5666 {
5667 bool forward = should_forward(op0);
5668 emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
5669 inherit_expression_dependencies(result_id, op0);
5670 }
5671
emit_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5672 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
5673 {
5674 // Various FP arithmetic opcodes such as add, sub, mul will hit this.
5675 bool force_temporary_precise = backend.support_precise_qualifier &&
5676 has_decoration(result_id, DecorationNoContraction) &&
5677 type_is_floating_point(get<SPIRType>(result_type));
5678 bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
5679
5680 emit_op(result_type, result_id,
5681 join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
5682
5683 inherit_expression_dependencies(result_id, op0);
5684 inherit_expression_dependencies(result_id, op1);
5685 }
5686
emit_unrolled_unary_op(uint32_t result_type,uint32_t result_id,uint32_t operand,const char * op)5687 void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
5688 {
5689 auto &type = get<SPIRType>(result_type);
5690 auto expr = type_to_glsl_constructor(type);
5691 expr += '(';
5692 for (uint32_t i = 0; i < type.vecsize; i++)
5693 {
5694 // Make sure to call to_expression multiple times to ensure
5695 // that these expressions are properly flushed to temporaries if needed.
5696 expr += op;
5697 expr += to_extract_component_expression(operand, i);
5698
5699 if (i + 1 < type.vecsize)
5700 expr += ", ";
5701 }
5702 expr += ')';
5703 emit_op(result_type, result_id, expr, should_forward(operand));
5704
5705 inherit_expression_dependencies(result_id, operand);
5706 }
5707
emit_unrolled_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,bool negate,SPIRType::BaseType expected_type)5708 void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5709 const char *op, bool negate, SPIRType::BaseType expected_type)
5710 {
5711 auto &type0 = expression_type(op0);
5712 auto &type1 = expression_type(op1);
5713
5714 SPIRType target_type0 = type0;
5715 SPIRType target_type1 = type1;
5716 target_type0.basetype = expected_type;
5717 target_type1.basetype = expected_type;
5718 target_type0.vecsize = 1;
5719 target_type1.vecsize = 1;
5720
5721 auto &type = get<SPIRType>(result_type);
5722 auto expr = type_to_glsl_constructor(type);
5723 expr += '(';
5724 for (uint32_t i = 0; i < type.vecsize; i++)
5725 {
5726 // Make sure to call to_expression multiple times to ensure
5727 // that these expressions are properly flushed to temporaries if needed.
5728 if (negate)
5729 expr += "!(";
5730
5731 if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
5732 expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
5733 else
5734 expr += to_extract_component_expression(op0, i);
5735
5736 expr += ' ';
5737 expr += op;
5738 expr += ' ';
5739
5740 if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
5741 expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
5742 else
5743 expr += to_extract_component_expression(op1, i);
5744
5745 if (negate)
5746 expr += ")";
5747
5748 if (i + 1 < type.vecsize)
5749 expr += ", ";
5750 }
5751 expr += ')';
5752 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5753
5754 inherit_expression_dependencies(result_id, op0);
5755 inherit_expression_dependencies(result_id, op1);
5756 }
5757
binary_op_bitcast_helper(string & cast_op0,string & cast_op1,SPIRType::BaseType & input_type,uint32_t op0,uint32_t op1,bool skip_cast_if_equal_type)5758 SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
5759 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
5760 {
5761 auto &type0 = expression_type(op0);
5762 auto &type1 = expression_type(op1);
5763
5764 // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
5765 // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
5766 // since equality test is exactly the same.
5767 bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
5768
5769 // Create a fake type so we can bitcast to it.
5770 // We only deal with regular arithmetic types here like int, uints and so on.
5771 SPIRType expected_type;
5772 expected_type.basetype = input_type;
5773 expected_type.vecsize = type0.vecsize;
5774 expected_type.columns = type0.columns;
5775 expected_type.width = type0.width;
5776
5777 if (cast)
5778 {
5779 cast_op0 = bitcast_glsl(expected_type, op0);
5780 cast_op1 = bitcast_glsl(expected_type, op1);
5781 }
5782 else
5783 {
5784 // If we don't cast, our actual input type is that of the first (or second) argument.
5785 cast_op0 = to_enclosed_unpacked_expression(op0);
5786 cast_op1 = to_enclosed_unpacked_expression(op1);
5787 input_type = type0.basetype;
5788 }
5789
5790 return expected_type;
5791 }
5792
emit_complex_bitcast(uint32_t result_type,uint32_t id,uint32_t op0)5793 bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
5794 {
5795 // Some bitcasts may require complex casting sequences, and are implemented here.
5796 // Otherwise a simply unary function will do with bitcast_glsl_op.
5797
5798 auto &output_type = get<SPIRType>(result_type);
5799 auto &input_type = expression_type(op0);
5800 string expr;
5801
5802 if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
5803 expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
5804 else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
5805 input_type.vecsize == 2)
5806 expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
5807 else
5808 return false;
5809
5810 emit_op(result_type, id, expr, should_forward(op0));
5811 return true;
5812 }
5813
emit_binary_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)5814 void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5815 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
5816 {
5817 string cast_op0, cast_op1;
5818 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
5819 auto &out_type = get<SPIRType>(result_type);
5820
5821 // We might have casted away from the result type, so bitcast again.
5822 // For example, arithmetic right shift with uint inputs.
5823 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
5824 string expr;
5825 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
5826 {
5827 expected_type.basetype = input_type;
5828 expr = bitcast_glsl_op(out_type, expected_type);
5829 expr += '(';
5830 expr += join(cast_op0, " ", op, " ", cast_op1);
5831 expr += ')';
5832 }
5833 else
5834 expr += join(cast_op0, " ", op, " ", cast_op1);
5835
5836 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5837 inherit_expression_dependencies(result_id, op0);
5838 inherit_expression_dependencies(result_id, op1);
5839 }
5840
emit_unary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)5841 void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5842 {
5843 bool forward = should_forward(op0);
5844 emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
5845 inherit_expression_dependencies(result_id, op0);
5846 }
5847
emit_binary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5848 void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5849 const char *op)
5850 {
5851 bool forward = should_forward(op0) && should_forward(op1);
5852 emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
5853 forward);
5854 inherit_expression_dependencies(result_id, op0);
5855 inherit_expression_dependencies(result_id, op1);
5856 }
5857
emit_atomic_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5858 void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5859 const char *op)
5860 {
5861 forced_temporaries.insert(result_id);
5862 emit_op(result_type, result_id,
5863 join(op, "(", to_non_uniform_aware_expression(op0), ", ",
5864 to_unpacked_expression(op1), ")"), false);
5865 flush_all_atomic_capable_variables();
5866 }
5867
emit_atomic_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op)5868 void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
5869 uint32_t op0, uint32_t op1, uint32_t op2,
5870 const char *op)
5871 {
5872 forced_temporaries.insert(result_id);
5873 emit_op(result_type, result_id,
5874 join(op, "(", to_non_uniform_aware_expression(op0), ", ",
5875 to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false);
5876 flush_all_atomic_capable_variables();
5877 }
5878
emit_unary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op,SPIRType::BaseType input_type,SPIRType::BaseType expected_result_type)5879 void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
5880 SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
5881 {
5882 auto &out_type = get<SPIRType>(result_type);
5883 auto &expr_type = expression_type(op0);
5884 auto expected_type = out_type;
5885
5886 // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
5887 expected_type.basetype = input_type;
5888 expected_type.width = expr_type.width;
5889
5890 string cast_op;
5891 if (expr_type.basetype != input_type)
5892 {
5893 if (expr_type.basetype == SPIRType::Boolean)
5894 cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")");
5895 else
5896 cast_op = bitcast_glsl(expected_type, op0);
5897 }
5898 else
5899 cast_op = to_unpacked_expression(op0);
5900
5901 string expr;
5902 if (out_type.basetype != expected_result_type)
5903 {
5904 expected_type.basetype = expected_result_type;
5905 expected_type.width = out_type.width;
5906 if (out_type.basetype == SPIRType::Boolean)
5907 expr = type_to_glsl(out_type);
5908 else
5909 expr = bitcast_glsl_op(out_type, expected_type);
5910 expr += '(';
5911 expr += join(op, "(", cast_op, ")");
5912 expr += ')';
5913 }
5914 else
5915 {
5916 expr += join(op, "(", cast_op, ")");
5917 }
5918
5919 emit_op(result_type, result_id, expr, should_forward(op0));
5920 inherit_expression_dependencies(result_id, op0);
5921 }
5922
5923 // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
5924 // and different vector sizes all at once. Need a special purpose method here.
emit_trinary_func_op_bitextract(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType expected_result_type,SPIRType::BaseType input_type0,SPIRType::BaseType input_type1,SPIRType::BaseType input_type2)5925 void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5926 uint32_t op2, const char *op,
5927 SPIRType::BaseType expected_result_type,
5928 SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
5929 SPIRType::BaseType input_type2)
5930 {
5931 auto &out_type = get<SPIRType>(result_type);
5932 auto expected_type = out_type;
5933 expected_type.basetype = input_type0;
5934
5935 string cast_op0 =
5936 expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5937
5938 auto op1_expr = to_unpacked_expression(op1);
5939 auto op2_expr = to_unpacked_expression(op2);
5940
5941 // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
5942 expected_type.basetype = input_type1;
5943 expected_type.vecsize = 1;
5944 string cast_op1 = expression_type(op1).basetype != input_type1 ?
5945 join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
5946 op1_expr;
5947
5948 expected_type.basetype = input_type2;
5949 expected_type.vecsize = 1;
5950 string cast_op2 = expression_type(op2).basetype != input_type2 ?
5951 join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
5952 op2_expr;
5953
5954 string expr;
5955 if (out_type.basetype != expected_result_type)
5956 {
5957 expected_type.vecsize = out_type.vecsize;
5958 expected_type.basetype = expected_result_type;
5959 expr = bitcast_glsl_op(out_type, expected_type);
5960 expr += '(';
5961 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5962 expr += ')';
5963 }
5964 else
5965 {
5966 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5967 }
5968
5969 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
5970 inherit_expression_dependencies(result_id, op0);
5971 inherit_expression_dependencies(result_id, op1);
5972 inherit_expression_dependencies(result_id, op2);
5973 }
5974
emit_trinary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType input_type)5975 void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5976 uint32_t op2, const char *op, SPIRType::BaseType input_type)
5977 {
5978 auto &out_type = get<SPIRType>(result_type);
5979 auto expected_type = out_type;
5980 expected_type.basetype = input_type;
5981 string cast_op0 =
5982 expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5983 string cast_op1 =
5984 expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
5985 string cast_op2 =
5986 expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
5987
5988 string expr;
5989 if (out_type.basetype != input_type)
5990 {
5991 expr = bitcast_glsl_op(out_type, expected_type);
5992 expr += '(';
5993 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5994 expr += ')';
5995 }
5996 else
5997 {
5998 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5999 }
6000
6001 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
6002 inherit_expression_dependencies(result_id, op0);
6003 inherit_expression_dependencies(result_id, op1);
6004 inherit_expression_dependencies(result_id, op2);
6005 }
6006
emit_binary_func_op_cast_clustered(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type)6007 void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
6008 uint32_t op1, const char *op, SPIRType::BaseType input_type)
6009 {
6010 // Special purpose method for implementing clustered subgroup opcodes.
6011 // Main difference is that op1 does not participate in any casting, it needs to be a literal.
6012 auto &out_type = get<SPIRType>(result_type);
6013 auto expected_type = out_type;
6014 expected_type.basetype = input_type;
6015 string cast_op0 =
6016 expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
6017
6018 string expr;
6019 if (out_type.basetype != input_type)
6020 {
6021 expr = bitcast_glsl_op(out_type, expected_type);
6022 expr += '(';
6023 expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
6024 expr += ')';
6025 }
6026 else
6027 {
6028 expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
6029 }
6030
6031 emit_op(result_type, result_id, expr, should_forward(op0));
6032 inherit_expression_dependencies(result_id, op0);
6033 }
6034
emit_binary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)6035 void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6036 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
6037 {
6038 string cast_op0, cast_op1;
6039 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
6040 auto &out_type = get<SPIRType>(result_type);
6041
6042 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
6043 string expr;
6044 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
6045 {
6046 expected_type.basetype = input_type;
6047 expr = bitcast_glsl_op(out_type, expected_type);
6048 expr += '(';
6049 expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
6050 expr += ')';
6051 }
6052 else
6053 {
6054 expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
6055 }
6056
6057 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
6058 inherit_expression_dependencies(result_id, op0);
6059 inherit_expression_dependencies(result_id, op1);
6060 }
6061
emit_trinary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op)6062 void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6063 uint32_t op2, const char *op)
6064 {
6065 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
6066 emit_op(result_type, result_id,
6067 join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
6068 to_unpacked_expression(op2), ")"),
6069 forward);
6070
6071 inherit_expression_dependencies(result_id, op0);
6072 inherit_expression_dependencies(result_id, op1);
6073 inherit_expression_dependencies(result_id, op2);
6074 }
6075
emit_quaternary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op)6076 void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6077 uint32_t op2, uint32_t op3, const char *op)
6078 {
6079 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
6080 emit_op(result_type, result_id,
6081 join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
6082 to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
6083 forward);
6084
6085 inherit_expression_dependencies(result_id, op0);
6086 inherit_expression_dependencies(result_id, op1);
6087 inherit_expression_dependencies(result_id, op2);
6088 inherit_expression_dependencies(result_id, op3);
6089 }
6090
emit_bitfield_insert_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op,SPIRType::BaseType offset_count_type)6091 void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6092 uint32_t op2, uint32_t op3, const char *op,
6093 SPIRType::BaseType offset_count_type)
6094 {
6095 // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
6096 // and bitfieldInsert is sign invariant.
6097 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
6098
6099 auto op0_expr = to_unpacked_expression(op0);
6100 auto op1_expr = to_unpacked_expression(op1);
6101 auto op2_expr = to_unpacked_expression(op2);
6102 auto op3_expr = to_unpacked_expression(op3);
6103
6104 SPIRType target_type;
6105 target_type.vecsize = 1;
6106 target_type.basetype = offset_count_type;
6107
6108 if (expression_type(op2).basetype != offset_count_type)
6109 {
6110 // Value-cast here. Input might be 16-bit. GLSL requires int.
6111 op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
6112 }
6113
6114 if (expression_type(op3).basetype != offset_count_type)
6115 {
6116 // Value-cast here. Input might be 16-bit. GLSL requires int.
6117 op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
6118 }
6119
6120 emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
6121 forward);
6122
6123 inherit_expression_dependencies(result_id, op0);
6124 inherit_expression_dependencies(result_id, op1);
6125 inherit_expression_dependencies(result_id, op2);
6126 inherit_expression_dependencies(result_id, op3);
6127 }
6128
legacy_tex_op(const std::string & op,const SPIRType & imgtype,uint32_t tex)6129 string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
6130 {
6131 const char *type;
6132 switch (imgtype.image.dim)
6133 {
6134 case spv::Dim1D:
6135 type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
6136 break;
6137 case spv::Dim2D:
6138 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
6139 break;
6140 case spv::Dim3D:
6141 type = "3D";
6142 break;
6143 case spv::DimCube:
6144 type = "Cube";
6145 break;
6146 case spv::DimRect:
6147 type = "2DRect";
6148 break;
6149 case spv::DimBuffer:
6150 type = "Buffer";
6151 break;
6152 case spv::DimSubpassData:
6153 type = "2D";
6154 break;
6155 default:
6156 type = "";
6157 break;
6158 }
6159
6160 // In legacy GLSL, an extension is required for textureLod in the fragment
6161 // shader or textureGrad anywhere.
6162 bool legacy_lod_ext = false;
6163 auto &execution = get_entry_point();
6164 if (op == "textureGrad" || op == "textureProjGrad" ||
6165 ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
6166 {
6167 if (is_legacy_es())
6168 {
6169 legacy_lod_ext = true;
6170 require_extension_internal("GL_EXT_shader_texture_lod");
6171 }
6172 else if (is_legacy_desktop())
6173 require_extension_internal("GL_ARB_shader_texture_lod");
6174 }
6175
6176 if (op == "textureLodOffset" || op == "textureProjLodOffset")
6177 {
6178 if (is_legacy_es())
6179 SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
6180
6181 require_extension_internal("GL_EXT_gpu_shader4");
6182 }
6183
6184 // GLES has very limited support for shadow samplers.
6185 // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
6186 // everything else can just throw
6187 bool is_comparison = image_is_comparison(imgtype, tex);
6188 if (is_comparison && is_legacy_es())
6189 {
6190 if (op == "texture" || op == "textureProj")
6191 require_extension_internal("GL_EXT_shadow_samplers");
6192 else
6193 SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
6194 }
6195
6196 if (op == "textureSize")
6197 {
6198 if (is_legacy_es())
6199 SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
6200 if (is_comparison)
6201 SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
6202 require_extension_internal("GL_EXT_gpu_shader4");
6203 }
6204
6205 if (op == "texelFetch" && is_legacy_es())
6206 SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
6207
6208 bool is_es_and_depth = is_legacy_es() && is_comparison;
6209 std::string type_prefix = is_comparison ? "shadow" : "texture";
6210
6211 if (op == "texture")
6212 return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
6213 else if (op == "textureLod")
6214 return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
6215 else if (op == "textureProj")
6216 return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
6217 else if (op == "textureGrad")
6218 return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
6219 else if (op == "textureProjLod")
6220 return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
6221 else if (op == "textureLodOffset")
6222 return join(type_prefix, type, "LodOffset");
6223 else if (op == "textureProjGrad")
6224 return join(type_prefix, type,
6225 is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
6226 else if (op == "textureProjLodOffset")
6227 return join(type_prefix, type, "ProjLodOffset");
6228 else if (op == "textureSize")
6229 return join("textureSize", type);
6230 else if (op == "texelFetch")
6231 return join("texelFetch", type);
6232 else
6233 {
6234 SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
6235 }
6236 }
6237
to_trivial_mix_op(const SPIRType & type,string & op,uint32_t left,uint32_t right,uint32_t lerp)6238 bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
6239 {
6240 auto *cleft = maybe_get<SPIRConstant>(left);
6241 auto *cright = maybe_get<SPIRConstant>(right);
6242 auto &lerptype = expression_type(lerp);
6243
6244 // If our targets aren't constants, we cannot use construction.
6245 if (!cleft || !cright)
6246 return false;
6247
6248 // If our targets are spec constants, we cannot use construction.
6249 if (cleft->specialization || cright->specialization)
6250 return false;
6251
6252 auto &value_type = get<SPIRType>(cleft->constant_type);
6253
6254 if (lerptype.basetype != SPIRType::Boolean)
6255 return false;
6256 if (value_type.basetype == SPIRType::Struct || is_array(value_type))
6257 return false;
6258 if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
6259 return false;
6260
6261 // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
6262 bool ret = true;
6263 for (uint32_t col = 0; col < value_type.columns; col++)
6264 {
6265 for (uint32_t row = 0; row < value_type.vecsize; row++)
6266 {
6267 switch (type.basetype)
6268 {
6269 case SPIRType::Short:
6270 case SPIRType::UShort:
6271 ret = cleft->scalar_u16(col, row) == 0 && cright->scalar_u16(col, row) == 1;
6272 break;
6273
6274 case SPIRType::Int:
6275 case SPIRType::UInt:
6276 ret = cleft->scalar(col, row) == 0 && cright->scalar(col, row) == 1;
6277 break;
6278
6279 case SPIRType::Half:
6280 ret = cleft->scalar_f16(col, row) == 0.0f && cright->scalar_f16(col, row) == 1.0f;
6281 break;
6282
6283 case SPIRType::Float:
6284 ret = cleft->scalar_f32(col, row) == 0.0f && cright->scalar_f32(col, row) == 1.0f;
6285 break;
6286
6287 case SPIRType::Double:
6288 ret = cleft->scalar_f64(col, row) == 0.0 && cright->scalar_f64(col, row) == 1.0;
6289 break;
6290
6291 case SPIRType::Int64:
6292 case SPIRType::UInt64:
6293 ret = cleft->scalar_u64(col, row) == 0 && cright->scalar_u64(col, row) == 1;
6294 break;
6295
6296 default:
6297 return false;
6298 }
6299 }
6300
6301 if (!ret)
6302 break;
6303 }
6304
6305 if (ret)
6306 op = type_to_glsl_constructor(type);
6307 return ret;
6308 }
6309
to_ternary_expression(const SPIRType & restype,uint32_t select,uint32_t true_value,uint32_t false_value)6310 string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
6311 uint32_t false_value)
6312 {
6313 string expr;
6314 auto &lerptype = expression_type(select);
6315
6316 if (lerptype.vecsize == 1)
6317 expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
6318 to_enclosed_pointer_expression(false_value));
6319 else
6320 {
6321 auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
6322
6323 expr = type_to_glsl_constructor(restype);
6324 expr += "(";
6325 for (uint32_t i = 0; i < restype.vecsize; i++)
6326 {
6327 expr += swiz(select, i);
6328 expr += " ? ";
6329 expr += swiz(true_value, i);
6330 expr += " : ";
6331 expr += swiz(false_value, i);
6332 if (i + 1 < restype.vecsize)
6333 expr += ", ";
6334 }
6335 expr += ")";
6336 }
6337
6338 return expr;
6339 }
6340
emit_mix_op(uint32_t result_type,uint32_t id,uint32_t left,uint32_t right,uint32_t lerp)6341 void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
6342 {
6343 auto &lerptype = expression_type(lerp);
6344 auto &restype = get<SPIRType>(result_type);
6345
6346 // If this results in a variable pointer, assume it may be written through.
6347 if (restype.pointer)
6348 {
6349 register_write(left);
6350 register_write(right);
6351 }
6352
6353 string mix_op;
6354 bool has_boolean_mix = *backend.boolean_mix_function &&
6355 ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
6356 bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
6357
6358 // Cannot use boolean mix when the lerp argument is just one boolean,
6359 // fall back to regular trinary statements.
6360 if (lerptype.vecsize == 1)
6361 has_boolean_mix = false;
6362
6363 // If we can reduce the mix to a simple cast, do so.
6364 // This helps for cases like int(bool), uint(bool) which is implemented with
6365 // OpSelect bool 1 0.
6366 if (trivial_mix)
6367 {
6368 emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
6369 }
6370 else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
6371 {
6372 // Boolean mix not supported on desktop without extension.
6373 // Was added in OpenGL 4.5 with ES 3.1 compat.
6374 //
6375 // Could use GL_EXT_shader_integer_mix on desktop at least,
6376 // but Apple doesn't support it. :(
6377 // Just implement it as ternary expressions.
6378 auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
6379 emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
6380 inherit_expression_dependencies(id, left);
6381 inherit_expression_dependencies(id, right);
6382 inherit_expression_dependencies(id, lerp);
6383 }
6384 else if (lerptype.basetype == SPIRType::Boolean)
6385 emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
6386 else
6387 emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
6388 }
6389
to_combined_image_sampler(VariableID image_id,VariableID samp_id)6390 string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
6391 {
6392 // Keep track of the array indices we have used to load the image.
6393 // We'll need to use the same array index into the combined image sampler array.
6394 auto image_expr = to_non_uniform_aware_expression(image_id);
6395 string array_expr;
6396 auto array_index = image_expr.find_first_of('[');
6397 if (array_index != string::npos)
6398 array_expr = image_expr.substr(array_index, string::npos);
6399
6400 auto &args = current_function->arguments;
6401
6402 // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
6403 // all possible combinations into new sampler2D uniforms.
6404 auto *image = maybe_get_backing_variable(image_id);
6405 auto *samp = maybe_get_backing_variable(samp_id);
6406 if (image)
6407 image_id = image->self;
6408 if (samp)
6409 samp_id = samp->self;
6410
6411 auto image_itr = find_if(begin(args), end(args),
6412 [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; });
6413
6414 auto sampler_itr = find_if(begin(args), end(args),
6415 [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; });
6416
6417 if (image_itr != end(args) || sampler_itr != end(args))
6418 {
6419 // If any parameter originates from a parameter, we will find it in our argument list.
6420 bool global_image = image_itr == end(args);
6421 bool global_sampler = sampler_itr == end(args);
6422 VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
6423 VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
6424
6425 auto &combined = current_function->combined_parameters;
6426 auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
6427 return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
6428 p.sampler_id == sid;
6429 });
6430
6431 if (itr != end(combined))
6432 return to_expression(itr->id) + array_expr;
6433 else
6434 {
6435 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
6436 "build_combined_image_samplers() used "
6437 "before compile() was called?");
6438 }
6439 }
6440 else
6441 {
6442 // For global sampler2D, look directly at the global remapping table.
6443 auto &mapping = combined_image_samplers;
6444 auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
6445 return combined.image_id == image_id && combined.sampler_id == samp_id;
6446 });
6447
6448 if (itr != end(combined_image_samplers))
6449 return to_expression(itr->combined_id) + array_expr;
6450 else
6451 {
6452 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
6453 "before compile() was called?");
6454 }
6455 }
6456 }
6457
is_supported_subgroup_op_in_opengl(spv::Op op)6458 bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
6459 {
6460 switch (op)
6461 {
6462 case OpGroupNonUniformElect:
6463 case OpGroupNonUniformBallot:
6464 case OpGroupNonUniformBallotFindLSB:
6465 case OpGroupNonUniformBallotFindMSB:
6466 case OpGroupNonUniformBroadcast:
6467 case OpGroupNonUniformBroadcastFirst:
6468 case OpGroupNonUniformAll:
6469 case OpGroupNonUniformAny:
6470 case OpGroupNonUniformAllEqual:
6471 case OpControlBarrier:
6472 case OpMemoryBarrier:
6473 case OpGroupNonUniformBallotBitCount:
6474 case OpGroupNonUniformBallotBitExtract:
6475 case OpGroupNonUniformInverseBallot:
6476 return true;
6477 default:
6478 return false;
6479 }
6480 }
6481
emit_sampled_image_op(uint32_t result_type,uint32_t result_id,uint32_t image_id,uint32_t samp_id)6482 void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
6483 {
6484 if (options.vulkan_semantics && combined_image_samplers.empty())
6485 {
6486 emit_binary_func_op(result_type, result_id, image_id, samp_id,
6487 type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
6488 }
6489 else
6490 {
6491 // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
6492 emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
6493 }
6494
6495 // Make sure to suppress usage tracking and any expression invalidation.
6496 // It is illegal to create temporaries of opaque types.
6497 forwarded_temporaries.erase(result_id);
6498 }
6499
image_opcode_is_sample_no_dref(Op op)6500 static inline bool image_opcode_is_sample_no_dref(Op op)
6501 {
6502 switch (op)
6503 {
6504 case OpImageSampleExplicitLod:
6505 case OpImageSampleImplicitLod:
6506 case OpImageSampleProjExplicitLod:
6507 case OpImageSampleProjImplicitLod:
6508 case OpImageFetch:
6509 case OpImageRead:
6510 case OpImageSparseSampleExplicitLod:
6511 case OpImageSparseSampleImplicitLod:
6512 case OpImageSparseSampleProjExplicitLod:
6513 case OpImageSparseSampleProjImplicitLod:
6514 case OpImageSparseFetch:
6515 case OpImageSparseRead:
6516 return true;
6517
6518 default:
6519 return false;
6520 }
6521 }
6522
emit_sparse_feedback_temporaries(uint32_t result_type_id,uint32_t id,uint32_t & feedback_id,uint32_t & texel_id)6523 void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
6524 uint32_t &texel_id)
6525 {
6526 // Need to allocate two temporaries.
6527 if (options.es)
6528 SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
6529 require_extension_internal("GL_ARB_sparse_texture2");
6530
6531 auto &temps = extra_sub_expressions[id];
6532 if (temps == 0)
6533 temps = ir.increase_bound_by(2);
6534
6535 feedback_id = temps + 0;
6536 texel_id = temps + 1;
6537
6538 auto &return_type = get<SPIRType>(result_type_id);
6539 if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
6540 SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
6541 emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
6542 emit_uninitialized_temporary(return_type.member_types[1], texel_id);
6543 }
6544
get_sparse_feedback_texel_id(uint32_t id) const6545 uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
6546 {
6547 auto itr = extra_sub_expressions.find(id);
6548 if (itr == extra_sub_expressions.end())
6549 return 0;
6550 else
6551 return itr->second + 1;
6552 }
6553
emit_texture_op(const Instruction & i,bool sparse)6554 void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
6555 {
6556 auto *ops = stream(i);
6557 auto op = static_cast<Op>(i.op);
6558
6559 SmallVector<uint32_t> inherited_expressions;
6560
6561 uint32_t result_type_id = ops[0];
6562 uint32_t id = ops[1];
6563 auto &return_type = get<SPIRType>(result_type_id);
6564
6565 uint32_t sparse_code_id = 0;
6566 uint32_t sparse_texel_id = 0;
6567 if (sparse)
6568 emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
6569
6570 bool forward = false;
6571 string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
6572
6573 if (sparse)
6574 {
6575 statement(to_expression(sparse_code_id), " = ", expr, ";");
6576 expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
6577 ")");
6578 forward = true;
6579 inherited_expressions.clear();
6580 }
6581
6582 emit_op(result_type_id, id, expr, forward);
6583 for (auto &inherit : inherited_expressions)
6584 inherit_expression_dependencies(id, inherit);
6585
6586 // Do not register sparse ops as control dependent as they are always lowered to a temporary.
6587 switch (op)
6588 {
6589 case OpImageSampleDrefImplicitLod:
6590 case OpImageSampleImplicitLod:
6591 case OpImageSampleProjImplicitLod:
6592 case OpImageSampleProjDrefImplicitLod:
6593 register_control_dependent_expression(id);
6594 break;
6595
6596 default:
6597 break;
6598 }
6599 }
6600
to_texture_op(const Instruction & i,bool sparse,bool * forward,SmallVector<uint32_t> & inherited_expressions)6601 std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
6602 SmallVector<uint32_t> &inherited_expressions)
6603 {
6604 auto *ops = stream(i);
6605 auto op = static_cast<Op>(i.op);
6606 uint32_t length = i.length;
6607
6608 uint32_t result_type_id = ops[0];
6609 VariableID img = ops[2];
6610 uint32_t coord = ops[3];
6611 uint32_t dref = 0;
6612 uint32_t comp = 0;
6613 bool gather = false;
6614 bool proj = false;
6615 bool fetch = false;
6616 bool nonuniform_expression = false;
6617 const uint32_t *opt = nullptr;
6618
6619 auto &result_type = get<SPIRType>(result_type_id);
6620
6621 inherited_expressions.push_back(coord);
6622 if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img))
6623 nonuniform_expression = true;
6624
6625 switch (op)
6626 {
6627 case OpImageSampleDrefImplicitLod:
6628 case OpImageSampleDrefExplicitLod:
6629 case OpImageSparseSampleDrefImplicitLod:
6630 case OpImageSparseSampleDrefExplicitLod:
6631 dref = ops[4];
6632 opt = &ops[5];
6633 length -= 5;
6634 break;
6635
6636 case OpImageSampleProjDrefImplicitLod:
6637 case OpImageSampleProjDrefExplicitLod:
6638 case OpImageSparseSampleProjDrefImplicitLod:
6639 case OpImageSparseSampleProjDrefExplicitLod:
6640 dref = ops[4];
6641 opt = &ops[5];
6642 length -= 5;
6643 proj = true;
6644 break;
6645
6646 case OpImageDrefGather:
6647 case OpImageSparseDrefGather:
6648 dref = ops[4];
6649 opt = &ops[5];
6650 length -= 5;
6651 gather = true;
6652 if (options.es && options.version < 310)
6653 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6654 else if (!options.es && options.version < 400)
6655 SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
6656 break;
6657
6658 case OpImageGather:
6659 case OpImageSparseGather:
6660 comp = ops[4];
6661 opt = &ops[5];
6662 length -= 5;
6663 gather = true;
6664 if (options.es && options.version < 310)
6665 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6666 else if (!options.es && options.version < 400)
6667 {
6668 if (!expression_is_constant_null(comp))
6669 SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
6670 require_extension_internal("GL_ARB_texture_gather");
6671 }
6672 break;
6673
6674 case OpImageFetch:
6675 case OpImageSparseFetch:
6676 case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
6677 opt = &ops[4];
6678 length -= 4;
6679 fetch = true;
6680 break;
6681
6682 case OpImageSampleProjImplicitLod:
6683 case OpImageSampleProjExplicitLod:
6684 case OpImageSparseSampleProjImplicitLod:
6685 case OpImageSparseSampleProjExplicitLod:
6686 opt = &ops[4];
6687 length -= 4;
6688 proj = true;
6689 break;
6690
6691 default:
6692 opt = &ops[4];
6693 length -= 4;
6694 break;
6695 }
6696
6697 // Bypass pointers because we need the real image struct
6698 auto &type = expression_type(img);
6699 auto &imgtype = get<SPIRType>(type.self);
6700
6701 uint32_t coord_components = 0;
6702 switch (imgtype.image.dim)
6703 {
6704 case spv::Dim1D:
6705 coord_components = 1;
6706 break;
6707 case spv::Dim2D:
6708 coord_components = 2;
6709 break;
6710 case spv::Dim3D:
6711 coord_components = 3;
6712 break;
6713 case spv::DimCube:
6714 coord_components = 3;
6715 break;
6716 case spv::DimBuffer:
6717 coord_components = 1;
6718 break;
6719 default:
6720 coord_components = 2;
6721 break;
6722 }
6723
6724 if (dref)
6725 inherited_expressions.push_back(dref);
6726
6727 if (proj)
6728 coord_components++;
6729 if (imgtype.image.arrayed)
6730 coord_components++;
6731
6732 uint32_t bias = 0;
6733 uint32_t lod = 0;
6734 uint32_t grad_x = 0;
6735 uint32_t grad_y = 0;
6736 uint32_t coffset = 0;
6737 uint32_t offset = 0;
6738 uint32_t coffsets = 0;
6739 uint32_t sample = 0;
6740 uint32_t minlod = 0;
6741 uint32_t flags = 0;
6742
6743 if (length)
6744 {
6745 flags = *opt++;
6746 length--;
6747 }
6748
6749 auto test = [&](uint32_t &v, uint32_t flag) {
6750 if (length && (flags & flag))
6751 {
6752 v = *opt++;
6753 inherited_expressions.push_back(v);
6754 length--;
6755 }
6756 };
6757
6758 test(bias, ImageOperandsBiasMask);
6759 test(lod, ImageOperandsLodMask);
6760 test(grad_x, ImageOperandsGradMask);
6761 test(grad_y, ImageOperandsGradMask);
6762 test(coffset, ImageOperandsConstOffsetMask);
6763 test(offset, ImageOperandsOffsetMask);
6764 test(coffsets, ImageOperandsConstOffsetsMask);
6765 test(sample, ImageOperandsSampleMask);
6766 test(minlod, ImageOperandsMinLodMask);
6767
6768 TextureFunctionBaseArguments base_args = {};
6769 base_args.img = img;
6770 base_args.imgtype = &imgtype;
6771 base_args.is_fetch = fetch != 0;
6772 base_args.is_gather = gather != 0;
6773 base_args.is_proj = proj != 0;
6774
6775 string expr;
6776 TextureFunctionNameArguments name_args = {};
6777
6778 name_args.base = base_args;
6779 name_args.has_array_offsets = coffsets != 0;
6780 name_args.has_offset = coffset != 0 || offset != 0;
6781 name_args.has_grad = grad_x != 0 || grad_y != 0;
6782 name_args.has_dref = dref != 0;
6783 name_args.is_sparse_feedback = sparse;
6784 name_args.has_min_lod = minlod != 0;
6785 name_args.lod = lod;
6786 expr += to_function_name(name_args);
6787 expr += "(";
6788
6789 uint32_t sparse_texel_id = 0;
6790 if (sparse)
6791 sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
6792
6793 TextureFunctionArguments args = {};
6794 args.base = base_args;
6795 args.coord = coord;
6796 args.coord_components = coord_components;
6797 args.dref = dref;
6798 args.grad_x = grad_x;
6799 args.grad_y = grad_y;
6800 args.lod = lod;
6801 args.coffset = coffset;
6802 args.offset = offset;
6803 args.bias = bias;
6804 args.component = comp;
6805 args.sample = sample;
6806 args.sparse_texel = sparse_texel_id;
6807 args.min_lod = minlod;
6808 args.nonuniform_expression = nonuniform_expression;
6809 expr += to_function_args(args, forward);
6810 expr += ")";
6811
6812 // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
6813 if (is_legacy() && image_is_comparison(imgtype, img))
6814 expr += ".r";
6815
6816 // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
6817 // Remap back to 4 components as sampling opcodes expect.
6818 if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
6819 {
6820 bool image_is_depth = false;
6821 const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
6822 VariableID image_id = combined ? combined->image : img;
6823
6824 if (combined && image_is_comparison(imgtype, combined->image))
6825 image_is_depth = true;
6826 else if (image_is_comparison(imgtype, img))
6827 image_is_depth = true;
6828
6829 // We must also check the backing variable for the image.
6830 // We might have loaded an OpImage, and used that handle for two different purposes.
6831 // Once with comparison, once without.
6832 auto *image_variable = maybe_get_backing_variable(image_id);
6833 if (image_variable && image_is_comparison(get<SPIRType>(image_variable->basetype), image_variable->self))
6834 image_is_depth = true;
6835
6836 if (image_is_depth)
6837 expr = remap_swizzle(result_type, 1, expr);
6838 }
6839
6840 if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
6841 {
6842 // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
6843 // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
6844 expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
6845 }
6846
6847 // Deals with reads from MSL. We might need to downconvert to fewer components.
6848 if (op == OpImageRead)
6849 expr = remap_swizzle(result_type, 4, expr);
6850
6851 return expr;
6852 }
6853
expression_is_constant_null(uint32_t id) const6854 bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
6855 {
6856 auto *c = maybe_get<SPIRConstant>(id);
6857 if (!c)
6858 return false;
6859 return c->constant_is_null();
6860 }
6861
expression_is_non_value_type_array(uint32_t ptr)6862 bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
6863 {
6864 auto &type = expression_type(ptr);
6865 if (type.array.empty())
6866 return false;
6867
6868 if (!backend.array_is_value_type)
6869 return true;
6870
6871 auto *var = maybe_get_backing_variable(ptr);
6872 if (!var)
6873 return false;
6874
6875 auto &backed_type = get<SPIRType>(var->basetype);
6876 return !backend.buffer_offset_array_is_value_type && backed_type.basetype == SPIRType::Struct &&
6877 has_member_decoration(backed_type.self, 0, DecorationOffset);
6878 }
6879
6880 // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
6881 // For some subclasses, the function is a method on the specified image.
to_function_name(const TextureFunctionNameArguments & args)6882 string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
6883 {
6884 if (args.has_min_lod)
6885 {
6886 if (options.es)
6887 SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
6888 require_extension_internal("GL_ARB_sparse_texture_clamp");
6889 }
6890
6891 string fname;
6892 auto &imgtype = *args.base.imgtype;
6893 VariableID tex = args.base.img;
6894
6895 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
6896 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
6897 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
6898 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
6899 bool workaround_lod_array_shadow_as_grad = false;
6900 if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
6901 image_is_comparison(imgtype, tex) && args.lod)
6902 {
6903 if (!expression_is_constant_null(args.lod))
6904 {
6905 SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
6906 "expressed in GLSL.");
6907 }
6908 workaround_lod_array_shadow_as_grad = true;
6909 }
6910
6911 if (args.is_sparse_feedback)
6912 fname += "sparse";
6913
6914 if (args.base.is_fetch)
6915 fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
6916 else
6917 {
6918 fname += args.is_sparse_feedback ? "Texture" : "texture";
6919
6920 if (args.base.is_gather)
6921 fname += "Gather";
6922 if (args.has_array_offsets)
6923 fname += "Offsets";
6924 if (args.base.is_proj)
6925 fname += "Proj";
6926 if (args.has_grad || workaround_lod_array_shadow_as_grad)
6927 fname += "Grad";
6928 if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
6929 fname += "Lod";
6930 }
6931
6932 if (args.has_offset)
6933 fname += "Offset";
6934
6935 if (args.has_min_lod)
6936 fname += "Clamp";
6937
6938 if (args.is_sparse_feedback || args.has_min_lod)
6939 fname += "ARB";
6940
6941 return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
6942 }
6943
convert_separate_image_to_expression(uint32_t id)6944 std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
6945 {
6946 auto *var = maybe_get_backing_variable(id);
6947
6948 // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
6949 // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
6950 if (var)
6951 {
6952 auto &type = get<SPIRType>(var->basetype);
6953 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
6954 {
6955 if (options.vulkan_semantics)
6956 {
6957 if (dummy_sampler_id)
6958 {
6959 // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
6960 auto sampled_type = type;
6961 sampled_type.basetype = SPIRType::SampledImage;
6962 return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ",
6963 to_expression(dummy_sampler_id), ")");
6964 }
6965 else
6966 {
6967 // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
6968 require_extension_internal("GL_EXT_samplerless_texture_functions");
6969 }
6970 }
6971 else
6972 {
6973 if (!dummy_sampler_id)
6974 SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
6975 "build_dummy_sampler_for_combined_images() called?");
6976
6977 return to_combined_image_sampler(id, dummy_sampler_id);
6978 }
6979 }
6980 }
6981
6982 return to_non_uniform_aware_expression(id);
6983 }
6984
6985 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
to_function_args(const TextureFunctionArguments & args,bool * p_forward)6986 string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
6987 {
6988 VariableID img = args.base.img;
6989 auto &imgtype = *args.base.imgtype;
6990
6991 string farg_str;
6992 if (args.base.is_fetch)
6993 farg_str = convert_separate_image_to_expression(img);
6994 else
6995 farg_str = to_non_uniform_aware_expression(img);
6996
6997 if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
6998 {
6999 // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
7000 farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
7001 }
7002
7003 bool swizz_func = backend.swizzle_is_function;
7004 auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
7005 if (comps == in_comps)
7006 return "";
7007
7008 switch (comps)
7009 {
7010 case 1:
7011 return ".x";
7012 case 2:
7013 return swizz_func ? ".xy()" : ".xy";
7014 case 3:
7015 return swizz_func ? ".xyz()" : ".xyz";
7016 default:
7017 return "";
7018 }
7019 };
7020
7021 bool forward = should_forward(args.coord);
7022
7023 // The IR can give us more components than we need, so chop them off as needed.
7024 auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
7025 // Only enclose the UV expression if needed.
7026 auto coord_expr =
7027 (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
7028
7029 // texelFetch only takes int, not uint.
7030 auto &coord_type = expression_type(args.coord);
7031 if (coord_type.basetype == SPIRType::UInt)
7032 {
7033 auto expected_type = coord_type;
7034 expected_type.vecsize = args.coord_components;
7035 expected_type.basetype = SPIRType::Int;
7036 coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
7037 }
7038
7039 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
7040 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
7041 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
7042 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
7043 bool workaround_lod_array_shadow_as_grad =
7044 ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
7045 image_is_comparison(imgtype, img) && args.lod != 0;
7046
7047 if (args.dref)
7048 {
7049 forward = forward && should_forward(args.dref);
7050
7051 // SPIR-V splits dref and coordinate.
7052 if (args.base.is_gather ||
7053 args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
7054 {
7055 farg_str += ", ";
7056 farg_str += to_expression(args.coord);
7057 farg_str += ", ";
7058 farg_str += to_expression(args.dref);
7059 }
7060 else if (args.base.is_proj)
7061 {
7062 // Have to reshuffle so we get vec4(coord, dref, proj), special case.
7063 // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
7064 // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
7065 farg_str += ", vec4(";
7066
7067 if (imgtype.image.dim == Dim1D)
7068 {
7069 // Could reuse coord_expr, but we will mess up the temporary usage checking.
7070 farg_str += to_enclosed_expression(args.coord) + ".x";
7071 farg_str += ", ";
7072 farg_str += "0.0, ";
7073 farg_str += to_expression(args.dref);
7074 farg_str += ", ";
7075 farg_str += to_enclosed_expression(args.coord) + ".y)";
7076 }
7077 else if (imgtype.image.dim == Dim2D)
7078 {
7079 // Could reuse coord_expr, but we will mess up the temporary usage checking.
7080 farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
7081 farg_str += ", ";
7082 farg_str += to_expression(args.dref);
7083 farg_str += ", ";
7084 farg_str += to_enclosed_expression(args.coord) + ".z)";
7085 }
7086 else
7087 SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
7088 }
7089 else
7090 {
7091 // Create a composite which merges coord/dref into a single vector.
7092 auto type = expression_type(args.coord);
7093 type.vecsize = args.coord_components + 1;
7094 farg_str += ", ";
7095 farg_str += type_to_glsl_constructor(type);
7096 farg_str += "(";
7097 farg_str += coord_expr;
7098 farg_str += ", ";
7099 farg_str += to_expression(args.dref);
7100 farg_str += ")";
7101 }
7102 }
7103 else
7104 {
7105 farg_str += ", ";
7106 farg_str += coord_expr;
7107 }
7108
7109 if (args.grad_x || args.grad_y)
7110 {
7111 forward = forward && should_forward(args.grad_x);
7112 forward = forward && should_forward(args.grad_y);
7113 farg_str += ", ";
7114 farg_str += to_expression(args.grad_x);
7115 farg_str += ", ";
7116 farg_str += to_expression(args.grad_y);
7117 }
7118
7119 if (args.lod)
7120 {
7121 if (workaround_lod_array_shadow_as_grad)
7122 {
7123 // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
7124 // Implementing this as plain texture() is not safe on some implementations.
7125 if (imgtype.image.dim == Dim2D)
7126 farg_str += ", vec2(0.0), vec2(0.0)";
7127 else if (imgtype.image.dim == DimCube)
7128 farg_str += ", vec3(0.0), vec3(0.0)";
7129 }
7130 else
7131 {
7132 forward = forward && should_forward(args.lod);
7133 farg_str += ", ";
7134
7135 auto &lod_expr_type = expression_type(args.lod);
7136
7137 // Lod expression for TexelFetch in GLSL must be int, and only int.
7138 if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
7139 lod_expr_type.basetype != SPIRType::Int)
7140 {
7141 farg_str += join("int(", to_expression(args.lod), ")");
7142 }
7143 else
7144 {
7145 farg_str += to_expression(args.lod);
7146 }
7147 }
7148 }
7149 else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
7150 {
7151 // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
7152 farg_str += ", 0";
7153 }
7154
7155 if (args.coffset)
7156 {
7157 forward = forward && should_forward(args.coffset);
7158 farg_str += ", ";
7159 farg_str += to_expression(args.coffset);
7160 }
7161 else if (args.offset)
7162 {
7163 forward = forward && should_forward(args.offset);
7164 farg_str += ", ";
7165 farg_str += to_expression(args.offset);
7166 }
7167
7168 if (args.sample)
7169 {
7170 farg_str += ", ";
7171 farg_str += to_expression(args.sample);
7172 }
7173
7174 if (args.min_lod)
7175 {
7176 farg_str += ", ";
7177 farg_str += to_expression(args.min_lod);
7178 }
7179
7180 if (args.sparse_texel)
7181 {
7182 // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
7183 farg_str += ", ";
7184 farg_str += to_expression(args.sparse_texel);
7185 }
7186
7187 if (args.bias)
7188 {
7189 forward = forward && should_forward(args.bias);
7190 farg_str += ", ";
7191 farg_str += to_expression(args.bias);
7192 }
7193
7194 if (args.component && !expression_is_constant_null(args.component))
7195 {
7196 forward = forward && should_forward(args.component);
7197 farg_str += ", ";
7198 auto &component_type = expression_type(args.component);
7199 if (component_type.basetype == SPIRType::Int)
7200 farg_str += to_expression(args.component);
7201 else
7202 farg_str += join("int(", to_expression(args.component), ")");
7203 }
7204
7205 *p_forward = forward;
7206
7207 return farg_str;
7208 }
7209
emit_glsl_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t length)7210 void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
7211 {
7212 auto op = static_cast<GLSLstd450>(eop);
7213
7214 if (is_legacy() && is_unsigned_glsl_opcode(op))
7215 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
7216
7217 // If we need to do implicit bitcasts, make sure we do it with the correct type.
7218 uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
7219 auto int_type = to_signed_basetype(integer_width);
7220 auto uint_type = to_unsigned_basetype(integer_width);
7221
7222 switch (op)
7223 {
7224 // FP fiddling
7225 case GLSLstd450Round:
7226 if (!is_legacy())
7227 emit_unary_func_op(result_type, id, args[0], "round");
7228 else
7229 {
7230 auto op0 = to_enclosed_expression(args[0]);
7231 auto &op0_type = expression_type(args[0]);
7232 auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
7233 bool forward = should_forward(args[0]);
7234 emit_op(result_type, id, expr, forward);
7235 inherit_expression_dependencies(id, args[0]);
7236 }
7237 break;
7238
7239 case GLSLstd450RoundEven:
7240 if (!is_legacy())
7241 emit_unary_func_op(result_type, id, args[0], "roundEven");
7242 else if (!options.es)
7243 {
7244 // This extension provides round() with round-to-even semantics.
7245 require_extension_internal("GL_EXT_gpu_shader4");
7246 emit_unary_func_op(result_type, id, args[0], "round");
7247 }
7248 else
7249 SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
7250 break;
7251
7252 case GLSLstd450Trunc:
7253 emit_unary_func_op(result_type, id, args[0], "trunc");
7254 break;
7255 case GLSLstd450SAbs:
7256 emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
7257 break;
7258 case GLSLstd450FAbs:
7259 emit_unary_func_op(result_type, id, args[0], "abs");
7260 break;
7261 case GLSLstd450SSign:
7262 emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
7263 break;
7264 case GLSLstd450FSign:
7265 emit_unary_func_op(result_type, id, args[0], "sign");
7266 break;
7267 case GLSLstd450Floor:
7268 emit_unary_func_op(result_type, id, args[0], "floor");
7269 break;
7270 case GLSLstd450Ceil:
7271 emit_unary_func_op(result_type, id, args[0], "ceil");
7272 break;
7273 case GLSLstd450Fract:
7274 emit_unary_func_op(result_type, id, args[0], "fract");
7275 break;
7276 case GLSLstd450Radians:
7277 emit_unary_func_op(result_type, id, args[0], "radians");
7278 break;
7279 case GLSLstd450Degrees:
7280 emit_unary_func_op(result_type, id, args[0], "degrees");
7281 break;
7282 case GLSLstd450Fma:
7283 if ((!options.es && options.version < 400) || (options.es && options.version < 320))
7284 {
7285 auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
7286 to_enclosed_expression(args[2]));
7287
7288 emit_op(result_type, id, expr,
7289 should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
7290 for (uint32_t i = 0; i < 3; i++)
7291 inherit_expression_dependencies(id, args[i]);
7292 }
7293 else
7294 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
7295 break;
7296 case GLSLstd450Modf:
7297 register_call_out_argument(args[1]);
7298 forced_temporaries.insert(id);
7299 emit_binary_func_op(result_type, id, args[0], args[1], "modf");
7300 break;
7301
7302 case GLSLstd450ModfStruct:
7303 {
7304 auto &type = get<SPIRType>(result_type);
7305 emit_uninitialized_temporary_expression(result_type, id);
7306 statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
7307 to_expression(id), ".", to_member_name(type, 1), ");");
7308 break;
7309 }
7310
7311 // Minmax
7312 case GLSLstd450UMin:
7313 emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
7314 break;
7315
7316 case GLSLstd450SMin:
7317 emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
7318 break;
7319
7320 case GLSLstd450FMin:
7321 emit_binary_func_op(result_type, id, args[0], args[1], "min");
7322 break;
7323
7324 case GLSLstd450FMax:
7325 emit_binary_func_op(result_type, id, args[0], args[1], "max");
7326 break;
7327
7328 case GLSLstd450UMax:
7329 emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
7330 break;
7331
7332 case GLSLstd450SMax:
7333 emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
7334 break;
7335
7336 case GLSLstd450FClamp:
7337 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
7338 break;
7339
7340 case GLSLstd450UClamp:
7341 emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
7342 break;
7343
7344 case GLSLstd450SClamp:
7345 emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
7346 break;
7347
7348 // Trig
7349 case GLSLstd450Sin:
7350 emit_unary_func_op(result_type, id, args[0], "sin");
7351 break;
7352 case GLSLstd450Cos:
7353 emit_unary_func_op(result_type, id, args[0], "cos");
7354 break;
7355 case GLSLstd450Tan:
7356 emit_unary_func_op(result_type, id, args[0], "tan");
7357 break;
7358 case GLSLstd450Asin:
7359 emit_unary_func_op(result_type, id, args[0], "asin");
7360 break;
7361 case GLSLstd450Acos:
7362 emit_unary_func_op(result_type, id, args[0], "acos");
7363 break;
7364 case GLSLstd450Atan:
7365 emit_unary_func_op(result_type, id, args[0], "atan");
7366 break;
7367 case GLSLstd450Sinh:
7368 emit_unary_func_op(result_type, id, args[0], "sinh");
7369 break;
7370 case GLSLstd450Cosh:
7371 emit_unary_func_op(result_type, id, args[0], "cosh");
7372 break;
7373 case GLSLstd450Tanh:
7374 emit_unary_func_op(result_type, id, args[0], "tanh");
7375 break;
7376 case GLSLstd450Asinh:
7377 emit_unary_func_op(result_type, id, args[0], "asinh");
7378 break;
7379 case GLSLstd450Acosh:
7380 emit_unary_func_op(result_type, id, args[0], "acosh");
7381 break;
7382 case GLSLstd450Atanh:
7383 emit_unary_func_op(result_type, id, args[0], "atanh");
7384 break;
7385 case GLSLstd450Atan2:
7386 emit_binary_func_op(result_type, id, args[0], args[1], "atan");
7387 break;
7388
7389 // Exponentials
7390 case GLSLstd450Pow:
7391 emit_binary_func_op(result_type, id, args[0], args[1], "pow");
7392 break;
7393 case GLSLstd450Exp:
7394 emit_unary_func_op(result_type, id, args[0], "exp");
7395 break;
7396 case GLSLstd450Log:
7397 emit_unary_func_op(result_type, id, args[0], "log");
7398 break;
7399 case GLSLstd450Exp2:
7400 emit_unary_func_op(result_type, id, args[0], "exp2");
7401 break;
7402 case GLSLstd450Log2:
7403 emit_unary_func_op(result_type, id, args[0], "log2");
7404 break;
7405 case GLSLstd450Sqrt:
7406 emit_unary_func_op(result_type, id, args[0], "sqrt");
7407 break;
7408 case GLSLstd450InverseSqrt:
7409 emit_unary_func_op(result_type, id, args[0], "inversesqrt");
7410 break;
7411
7412 // Matrix math
7413 case GLSLstd450Determinant:
7414 emit_unary_func_op(result_type, id, args[0], "determinant");
7415 break;
7416 case GLSLstd450MatrixInverse:
7417 emit_unary_func_op(result_type, id, args[0], "inverse");
7418 break;
7419
7420 // Lerping
7421 case GLSLstd450FMix:
7422 case GLSLstd450IMix:
7423 {
7424 emit_mix_op(result_type, id, args[0], args[1], args[2]);
7425 break;
7426 }
7427 case GLSLstd450Step:
7428 emit_binary_func_op(result_type, id, args[0], args[1], "step");
7429 break;
7430 case GLSLstd450SmoothStep:
7431 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
7432 break;
7433
7434 // Packing
7435 case GLSLstd450Frexp:
7436 register_call_out_argument(args[1]);
7437 forced_temporaries.insert(id);
7438 emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
7439 break;
7440
7441 case GLSLstd450FrexpStruct:
7442 {
7443 auto &type = get<SPIRType>(result_type);
7444 emit_uninitialized_temporary_expression(result_type, id);
7445 statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
7446 to_expression(id), ".", to_member_name(type, 1), ");");
7447 break;
7448 }
7449
7450 case GLSLstd450Ldexp:
7451 {
7452 bool forward = should_forward(args[0]) && should_forward(args[1]);
7453
7454 auto op0 = to_unpacked_expression(args[0]);
7455 auto op1 = to_unpacked_expression(args[1]);
7456 auto &op1_type = expression_type(args[1]);
7457 if (op1_type.basetype != SPIRType::Int)
7458 {
7459 // Need a value cast here.
7460 auto target_type = op1_type;
7461 target_type.basetype = SPIRType::Int;
7462 op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
7463 }
7464
7465 auto expr = join("ldexp(", op0, ", ", op1, ")");
7466
7467 emit_op(result_type, id, expr, forward);
7468 inherit_expression_dependencies(id, args[0]);
7469 inherit_expression_dependencies(id, args[1]);
7470 break;
7471 }
7472
7473 case GLSLstd450PackSnorm4x8:
7474 emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
7475 break;
7476 case GLSLstd450PackUnorm4x8:
7477 emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
7478 break;
7479 case GLSLstd450PackSnorm2x16:
7480 emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
7481 break;
7482 case GLSLstd450PackUnorm2x16:
7483 emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
7484 break;
7485 case GLSLstd450PackHalf2x16:
7486 emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
7487 break;
7488 case GLSLstd450UnpackSnorm4x8:
7489 emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
7490 break;
7491 case GLSLstd450UnpackUnorm4x8:
7492 emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
7493 break;
7494 case GLSLstd450UnpackSnorm2x16:
7495 emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
7496 break;
7497 case GLSLstd450UnpackUnorm2x16:
7498 emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
7499 break;
7500 case GLSLstd450UnpackHalf2x16:
7501 emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
7502 break;
7503
7504 case GLSLstd450PackDouble2x32:
7505 emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
7506 break;
7507 case GLSLstd450UnpackDouble2x32:
7508 emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
7509 break;
7510
7511 // Vector math
7512 case GLSLstd450Length:
7513 emit_unary_func_op(result_type, id, args[0], "length");
7514 break;
7515 case GLSLstd450Distance:
7516 emit_binary_func_op(result_type, id, args[0], args[1], "distance");
7517 break;
7518 case GLSLstd450Cross:
7519 emit_binary_func_op(result_type, id, args[0], args[1], "cross");
7520 break;
7521 case GLSLstd450Normalize:
7522 emit_unary_func_op(result_type, id, args[0], "normalize");
7523 break;
7524 case GLSLstd450FaceForward:
7525 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
7526 break;
7527 case GLSLstd450Reflect:
7528 emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
7529 break;
7530 case GLSLstd450Refract:
7531 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
7532 break;
7533
7534 // Bit-fiddling
7535 case GLSLstd450FindILsb:
7536 // findLSB always returns int.
7537 emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
7538 break;
7539
7540 case GLSLstd450FindSMsb:
7541 emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
7542 break;
7543
7544 case GLSLstd450FindUMsb:
7545 emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
7546 int_type); // findMSB always returns int.
7547 break;
7548
7549 // Multisampled varying
7550 case GLSLstd450InterpolateAtCentroid:
7551 emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
7552 break;
7553 case GLSLstd450InterpolateAtSample:
7554 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
7555 break;
7556 case GLSLstd450InterpolateAtOffset:
7557 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
7558 break;
7559
7560 case GLSLstd450NMin:
7561 case GLSLstd450NMax:
7562 {
7563 emit_nminmax_op(result_type, id, args[0], args[1], op);
7564 break;
7565 }
7566
7567 case GLSLstd450NClamp:
7568 {
7569 // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
7570 // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
7571 uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
7572 if (!max_id)
7573 max_id = ir.increase_bound_by(1);
7574
7575 // Inherit precision qualifiers.
7576 ir.meta[max_id] = ir.meta[id];
7577
7578 emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
7579 emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
7580 break;
7581 }
7582
7583 default:
7584 statement("// unimplemented GLSL op ", eop);
7585 break;
7586 }
7587 }
7588
emit_nminmax_op(uint32_t result_type,uint32_t id,uint32_t op0,uint32_t op1,GLSLstd450 op)7589 void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
7590 {
7591 // Need to emulate this call.
7592 uint32_t &ids = extra_sub_expressions[id];
7593 if (!ids)
7594 {
7595 ids = ir.increase_bound_by(5);
7596 auto btype = get<SPIRType>(result_type);
7597 btype.basetype = SPIRType::Boolean;
7598 set<SPIRType>(ids, btype);
7599 }
7600
7601 uint32_t btype_id = ids + 0;
7602 uint32_t left_nan_id = ids + 1;
7603 uint32_t right_nan_id = ids + 2;
7604 uint32_t tmp_id = ids + 3;
7605 uint32_t mixed_first_id = ids + 4;
7606
7607 // Inherit precision qualifiers.
7608 ir.meta[tmp_id] = ir.meta[id];
7609 ir.meta[mixed_first_id] = ir.meta[id];
7610
7611 emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
7612 emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
7613 emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
7614 emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
7615 emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
7616 }
7617
emit_spv_amd_shader_ballot_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7618 void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7619 uint32_t)
7620 {
7621 require_extension_internal("GL_AMD_shader_ballot");
7622
7623 enum AMDShaderBallot
7624 {
7625 SwizzleInvocationsAMD = 1,
7626 SwizzleInvocationsMaskedAMD = 2,
7627 WriteInvocationAMD = 3,
7628 MbcntAMD = 4
7629 };
7630
7631 auto op = static_cast<AMDShaderBallot>(eop);
7632
7633 switch (op)
7634 {
7635 case SwizzleInvocationsAMD:
7636 emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
7637 register_control_dependent_expression(id);
7638 break;
7639
7640 case SwizzleInvocationsMaskedAMD:
7641 emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
7642 register_control_dependent_expression(id);
7643 break;
7644
7645 case WriteInvocationAMD:
7646 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
7647 register_control_dependent_expression(id);
7648 break;
7649
7650 case MbcntAMD:
7651 emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
7652 register_control_dependent_expression(id);
7653 break;
7654
7655 default:
7656 statement("// unimplemented SPV AMD shader ballot op ", eop);
7657 break;
7658 }
7659 }
7660
emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7661 void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
7662 const uint32_t *args, uint32_t)
7663 {
7664 require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
7665
7666 enum AMDShaderExplicitVertexParameter
7667 {
7668 InterpolateAtVertexAMD = 1
7669 };
7670
7671 auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
7672
7673 switch (op)
7674 {
7675 case InterpolateAtVertexAMD:
7676 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
7677 break;
7678
7679 default:
7680 statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
7681 break;
7682 }
7683 }
7684
emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7685 void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
7686 const uint32_t *args, uint32_t)
7687 {
7688 require_extension_internal("GL_AMD_shader_trinary_minmax");
7689
7690 enum AMDShaderTrinaryMinMax
7691 {
7692 FMin3AMD = 1,
7693 UMin3AMD = 2,
7694 SMin3AMD = 3,
7695 FMax3AMD = 4,
7696 UMax3AMD = 5,
7697 SMax3AMD = 6,
7698 FMid3AMD = 7,
7699 UMid3AMD = 8,
7700 SMid3AMD = 9
7701 };
7702
7703 auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
7704
7705 switch (op)
7706 {
7707 case FMin3AMD:
7708 case UMin3AMD:
7709 case SMin3AMD:
7710 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
7711 break;
7712
7713 case FMax3AMD:
7714 case UMax3AMD:
7715 case SMax3AMD:
7716 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
7717 break;
7718
7719 case FMid3AMD:
7720 case UMid3AMD:
7721 case SMid3AMD:
7722 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
7723 break;
7724
7725 default:
7726 statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
7727 break;
7728 }
7729 }
7730
emit_spv_amd_gcn_shader_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7731 void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7732 uint32_t)
7733 {
7734 require_extension_internal("GL_AMD_gcn_shader");
7735
7736 enum AMDGCNShader
7737 {
7738 CubeFaceIndexAMD = 1,
7739 CubeFaceCoordAMD = 2,
7740 TimeAMD = 3
7741 };
7742
7743 auto op = static_cast<AMDGCNShader>(eop);
7744
7745 switch (op)
7746 {
7747 case CubeFaceIndexAMD:
7748 emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
7749 break;
7750 case CubeFaceCoordAMD:
7751 emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
7752 break;
7753 case TimeAMD:
7754 {
7755 string expr = "timeAMD()";
7756 emit_op(result_type, id, expr, true);
7757 register_control_dependent_expression(id);
7758 break;
7759 }
7760
7761 default:
7762 statement("// unimplemented SPV AMD gcn shader op ", eop);
7763 break;
7764 }
7765 }
7766
emit_subgroup_op(const Instruction & i)7767 void CompilerGLSL::emit_subgroup_op(const Instruction &i)
7768 {
7769 const uint32_t *ops = stream(i);
7770 auto op = static_cast<Op>(i.op);
7771
7772 if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
7773 SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
7774
7775 // If we need to do implicit bitcasts, make sure we do it with the correct type.
7776 uint32_t integer_width = get_integer_width_for_instruction(i);
7777 auto int_type = to_signed_basetype(integer_width);
7778 auto uint_type = to_unsigned_basetype(integer_width);
7779
7780 switch (op)
7781 {
7782 case OpGroupNonUniformElect:
7783 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
7784 break;
7785
7786 case OpGroupNonUniformBallotBitCount:
7787 {
7788 const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
7789 if (operation == GroupOperationReduce)
7790 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
7791 else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
7792 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7793 }
7794 break;
7795
7796 case OpGroupNonUniformBallotBitExtract:
7797 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
7798 break;
7799
7800 case OpGroupNonUniformInverseBallot:
7801 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7802 break;
7803
7804 case OpGroupNonUniformBallot:
7805 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
7806 break;
7807
7808 case OpGroupNonUniformBallotFindLSB:
7809 case OpGroupNonUniformBallotFindMSB:
7810 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
7811 break;
7812
7813 case OpGroupNonUniformBroadcast:
7814 case OpGroupNonUniformBroadcastFirst:
7815 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
7816 break;
7817
7818 case OpGroupNonUniformShuffle:
7819 case OpGroupNonUniformShuffleXor:
7820 require_extension_internal("GL_KHR_shader_subgroup_shuffle");
7821 break;
7822
7823 case OpGroupNonUniformShuffleUp:
7824 case OpGroupNonUniformShuffleDown:
7825 require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
7826 break;
7827
7828 case OpGroupNonUniformAll:
7829 case OpGroupNonUniformAny:
7830 case OpGroupNonUniformAllEqual:
7831 {
7832 const SPIRType &type = expression_type(ops[3]);
7833 if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
7834 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
7835 else
7836 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
7837 }
7838 break;
7839
7840 case OpGroupNonUniformFAdd:
7841 case OpGroupNonUniformFMul:
7842 case OpGroupNonUniformFMin:
7843 case OpGroupNonUniformFMax:
7844 case OpGroupNonUniformIAdd:
7845 case OpGroupNonUniformIMul:
7846 case OpGroupNonUniformSMin:
7847 case OpGroupNonUniformSMax:
7848 case OpGroupNonUniformUMin:
7849 case OpGroupNonUniformUMax:
7850 case OpGroupNonUniformBitwiseAnd:
7851 case OpGroupNonUniformBitwiseOr:
7852 case OpGroupNonUniformBitwiseXor:
7853 case OpGroupNonUniformLogicalAnd:
7854 case OpGroupNonUniformLogicalOr:
7855 case OpGroupNonUniformLogicalXor:
7856 {
7857 auto operation = static_cast<GroupOperation>(ops[3]);
7858 if (operation == GroupOperationClusteredReduce)
7859 {
7860 require_extension_internal("GL_KHR_shader_subgroup_clustered");
7861 }
7862 else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
7863 operation == GroupOperationReduce)
7864 {
7865 require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
7866 }
7867 else
7868 SPIRV_CROSS_THROW("Invalid group operation.");
7869 break;
7870 }
7871
7872 case OpGroupNonUniformQuadSwap:
7873 case OpGroupNonUniformQuadBroadcast:
7874 require_extension_internal("GL_KHR_shader_subgroup_quad");
7875 break;
7876
7877 default:
7878 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
7879 }
7880
7881 uint32_t result_type = ops[0];
7882 uint32_t id = ops[1];
7883
7884 auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
7885 if (scope != ScopeSubgroup)
7886 SPIRV_CROSS_THROW("Only subgroup scope is supported.");
7887
7888 switch (op)
7889 {
7890 case OpGroupNonUniformElect:
7891 emit_op(result_type, id, "subgroupElect()", true);
7892 break;
7893
7894 case OpGroupNonUniformBroadcast:
7895 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
7896 break;
7897
7898 case OpGroupNonUniformBroadcastFirst:
7899 emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
7900 break;
7901
7902 case OpGroupNonUniformBallot:
7903 emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
7904 break;
7905
7906 case OpGroupNonUniformInverseBallot:
7907 emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
7908 break;
7909
7910 case OpGroupNonUniformBallotBitExtract:
7911 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
7912 break;
7913
7914 case OpGroupNonUniformBallotFindLSB:
7915 emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
7916 break;
7917
7918 case OpGroupNonUniformBallotFindMSB:
7919 emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
7920 break;
7921
7922 case OpGroupNonUniformBallotBitCount:
7923 {
7924 auto operation = static_cast<GroupOperation>(ops[3]);
7925 if (operation == GroupOperationReduce)
7926 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
7927 else if (operation == GroupOperationInclusiveScan)
7928 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
7929 else if (operation == GroupOperationExclusiveScan)
7930 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
7931 else
7932 SPIRV_CROSS_THROW("Invalid BitCount operation.");
7933 break;
7934 }
7935
7936 case OpGroupNonUniformShuffle:
7937 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
7938 break;
7939
7940 case OpGroupNonUniformShuffleXor:
7941 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
7942 break;
7943
7944 case OpGroupNonUniformShuffleUp:
7945 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
7946 break;
7947
7948 case OpGroupNonUniformShuffleDown:
7949 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
7950 break;
7951
7952 case OpGroupNonUniformAll:
7953 emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
7954 break;
7955
7956 case OpGroupNonUniformAny:
7957 emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
7958 break;
7959
7960 case OpGroupNonUniformAllEqual:
7961 emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
7962 break;
7963
7964 // clang-format off
7965 #define GLSL_GROUP_OP(op, glsl_op) \
7966 case OpGroupNonUniform##op: \
7967 { \
7968 auto operation = static_cast<GroupOperation>(ops[3]); \
7969 if (operation == GroupOperationReduce) \
7970 emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
7971 else if (operation == GroupOperationInclusiveScan) \
7972 emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
7973 else if (operation == GroupOperationExclusiveScan) \
7974 emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
7975 else if (operation == GroupOperationClusteredReduce) \
7976 emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
7977 else \
7978 SPIRV_CROSS_THROW("Invalid group operation."); \
7979 break; \
7980 }
7981
7982 #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
7983 case OpGroupNonUniform##op: \
7984 { \
7985 auto operation = static_cast<GroupOperation>(ops[3]); \
7986 if (operation == GroupOperationReduce) \
7987 emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
7988 else if (operation == GroupOperationInclusiveScan) \
7989 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
7990 else if (operation == GroupOperationExclusiveScan) \
7991 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
7992 else if (operation == GroupOperationClusteredReduce) \
7993 emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
7994 else \
7995 SPIRV_CROSS_THROW("Invalid group operation."); \
7996 break; \
7997 }
7998
7999 GLSL_GROUP_OP(FAdd, Add)
8000 GLSL_GROUP_OP(FMul, Mul)
8001 GLSL_GROUP_OP(FMin, Min)
8002 GLSL_GROUP_OP(FMax, Max)
8003 GLSL_GROUP_OP(IAdd, Add)
8004 GLSL_GROUP_OP(IMul, Mul)
8005 GLSL_GROUP_OP_CAST(SMin, Min, int_type)
8006 GLSL_GROUP_OP_CAST(SMax, Max, int_type)
8007 GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
8008 GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
8009 GLSL_GROUP_OP(BitwiseAnd, And)
8010 GLSL_GROUP_OP(BitwiseOr, Or)
8011 GLSL_GROUP_OP(BitwiseXor, Xor)
8012 GLSL_GROUP_OP(LogicalAnd, And)
8013 GLSL_GROUP_OP(LogicalOr, Or)
8014 GLSL_GROUP_OP(LogicalXor, Xor)
8015 #undef GLSL_GROUP_OP
8016 #undef GLSL_GROUP_OP_CAST
8017 // clang-format on
8018
8019 case OpGroupNonUniformQuadSwap:
8020 {
8021 uint32_t direction = evaluate_constant_u32(ops[4]);
8022 if (direction == 0)
8023 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
8024 else if (direction == 1)
8025 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
8026 else if (direction == 2)
8027 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
8028 else
8029 SPIRV_CROSS_THROW("Invalid quad swap direction.");
8030 break;
8031 }
8032
8033 case OpGroupNonUniformQuadBroadcast:
8034 {
8035 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
8036 break;
8037 }
8038
8039 default:
8040 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
8041 }
8042
8043 register_control_dependent_expression(id);
8044 }
8045
bitcast_glsl_op(const SPIRType & out_type,const SPIRType & in_type)8046 string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
8047 {
8048 // OpBitcast can deal with pointers.
8049 if (out_type.pointer || in_type.pointer)
8050 return type_to_glsl(out_type);
8051
8052 if (out_type.basetype == in_type.basetype)
8053 return "";
8054
8055 assert(out_type.basetype != SPIRType::Boolean);
8056 assert(in_type.basetype != SPIRType::Boolean);
8057
8058 bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
8059 bool same_size_cast = out_type.width == in_type.width;
8060
8061 // Trivial bitcast case, casts between integers.
8062 if (integral_cast && same_size_cast)
8063 return type_to_glsl(out_type);
8064
8065 // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
8066 if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
8067 return "unpack8";
8068 else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
8069 return "pack16";
8070 else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
8071 return "pack32";
8072
8073 // Floating <-> Integer special casts. Just have to enumerate all cases. :(
8074 // 16-bit, 32-bit and 64-bit floats.
8075 if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
8076 {
8077 if (is_legacy_es())
8078 SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
8079 else if (!options.es && options.version < 330)
8080 require_extension_internal("GL_ARB_shader_bit_encoding");
8081 return "floatBitsToUint";
8082 }
8083 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
8084 {
8085 if (is_legacy_es())
8086 SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
8087 else if (!options.es && options.version < 330)
8088 require_extension_internal("GL_ARB_shader_bit_encoding");
8089 return "floatBitsToInt";
8090 }
8091 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
8092 {
8093 if (is_legacy_es())
8094 SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
8095 else if (!options.es && options.version < 330)
8096 require_extension_internal("GL_ARB_shader_bit_encoding");
8097 return "uintBitsToFloat";
8098 }
8099 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
8100 {
8101 if (is_legacy_es())
8102 SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
8103 else if (!options.es && options.version < 330)
8104 require_extension_internal("GL_ARB_shader_bit_encoding");
8105 return "intBitsToFloat";
8106 }
8107
8108 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
8109 return "doubleBitsToInt64";
8110 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
8111 return "doubleBitsToUint64";
8112 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
8113 return "int64BitsToDouble";
8114 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
8115 return "uint64BitsToDouble";
8116 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
8117 return "float16BitsToInt16";
8118 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
8119 return "float16BitsToUint16";
8120 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
8121 return "int16BitsToFloat16";
8122 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
8123 return "uint16BitsToFloat16";
8124
8125 // And finally, some even more special purpose casts.
8126 if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
8127 return "packUint2x32";
8128 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
8129 return "unpackUint2x32";
8130 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
8131 return "unpackFloat2x16";
8132 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
8133 return "packFloat2x16";
8134 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
8135 return "packInt2x16";
8136 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
8137 return "unpackInt2x16";
8138 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
8139 return "packUint2x16";
8140 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
8141 return "unpackUint2x16";
8142 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
8143 return "packInt4x16";
8144 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
8145 return "unpackInt4x16";
8146 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
8147 return "packUint4x16";
8148 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
8149 return "unpackUint4x16";
8150
8151 return "";
8152 }
8153
bitcast_glsl(const SPIRType & result_type,uint32_t argument)8154 string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
8155 {
8156 auto op = bitcast_glsl_op(result_type, expression_type(argument));
8157 if (op.empty())
8158 return to_enclosed_unpacked_expression(argument);
8159 else
8160 return join(op, "(", to_unpacked_expression(argument), ")");
8161 }
8162
bitcast_expression(SPIRType::BaseType target_type,uint32_t arg)8163 std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
8164 {
8165 auto expr = to_expression(arg);
8166 auto &src_type = expression_type(arg);
8167 if (src_type.basetype != target_type)
8168 {
8169 auto target = src_type;
8170 target.basetype = target_type;
8171 expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
8172 }
8173
8174 return expr;
8175 }
8176
bitcast_expression(const SPIRType & target_type,SPIRType::BaseType expr_type,const std::string & expr)8177 std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
8178 const std::string &expr)
8179 {
8180 if (target_type.basetype == expr_type)
8181 return expr;
8182
8183 auto src_type = target_type;
8184 src_type.basetype = expr_type;
8185 return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
8186 }
8187
builtin_to_glsl(BuiltIn builtin,StorageClass storage)8188 string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
8189 {
8190 switch (builtin)
8191 {
8192 case BuiltInPosition:
8193 return "gl_Position";
8194 case BuiltInPointSize:
8195 return "gl_PointSize";
8196 case BuiltInClipDistance:
8197 return "gl_ClipDistance";
8198 case BuiltInCullDistance:
8199 return "gl_CullDistance";
8200 case BuiltInVertexId:
8201 if (options.vulkan_semantics)
8202 SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
8203 "with GL semantics.");
8204 return "gl_VertexID";
8205 case BuiltInInstanceId:
8206 if (options.vulkan_semantics)
8207 {
8208 auto model = get_entry_point().model;
8209 switch (model)
8210 {
8211 case spv::ExecutionModelIntersectionKHR:
8212 case spv::ExecutionModelAnyHitKHR:
8213 case spv::ExecutionModelClosestHitKHR:
8214 // gl_InstanceID is allowed in these shaders.
8215 break;
8216
8217 default:
8218 SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
8219 "created with GL semantics.");
8220 }
8221 }
8222 if (!options.es && options.version < 140)
8223 {
8224 require_extension_internal("GL_ARB_draw_instanced");
8225 }
8226 return "gl_InstanceID";
8227 case BuiltInVertexIndex:
8228 if (options.vulkan_semantics)
8229 return "gl_VertexIndex";
8230 else
8231 return "gl_VertexID"; // gl_VertexID already has the base offset applied.
8232 case BuiltInInstanceIndex:
8233 if (options.vulkan_semantics)
8234 return "gl_InstanceIndex";
8235
8236 if (!options.es && options.version < 140)
8237 {
8238 require_extension_internal("GL_ARB_draw_instanced");
8239 }
8240
8241 if (options.vertex.support_nonzero_base_instance)
8242 {
8243 if (!options.vulkan_semantics)
8244 {
8245 // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
8246 require_extension_internal("GL_ARB_shader_draw_parameters");
8247 }
8248 return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
8249 }
8250 else
8251 return "gl_InstanceID";
8252 case BuiltInPrimitiveId:
8253 if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
8254 return "gl_PrimitiveIDIn";
8255 else
8256 return "gl_PrimitiveID";
8257 case BuiltInInvocationId:
8258 return "gl_InvocationID";
8259 case BuiltInLayer:
8260 return "gl_Layer";
8261 case BuiltInViewportIndex:
8262 return "gl_ViewportIndex";
8263 case BuiltInTessLevelOuter:
8264 return "gl_TessLevelOuter";
8265 case BuiltInTessLevelInner:
8266 return "gl_TessLevelInner";
8267 case BuiltInTessCoord:
8268 return "gl_TessCoord";
8269 case BuiltInFragCoord:
8270 return "gl_FragCoord";
8271 case BuiltInPointCoord:
8272 return "gl_PointCoord";
8273 case BuiltInFrontFacing:
8274 return "gl_FrontFacing";
8275 case BuiltInFragDepth:
8276 return "gl_FragDepth";
8277 case BuiltInNumWorkgroups:
8278 return "gl_NumWorkGroups";
8279 case BuiltInWorkgroupSize:
8280 return "gl_WorkGroupSize";
8281 case BuiltInWorkgroupId:
8282 return "gl_WorkGroupID";
8283 case BuiltInLocalInvocationId:
8284 return "gl_LocalInvocationID";
8285 case BuiltInGlobalInvocationId:
8286 return "gl_GlobalInvocationID";
8287 case BuiltInLocalInvocationIndex:
8288 return "gl_LocalInvocationIndex";
8289 case BuiltInHelperInvocation:
8290 return "gl_HelperInvocation";
8291
8292 case BuiltInBaseVertex:
8293 if (options.es)
8294 SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
8295
8296 if (options.vulkan_semantics)
8297 {
8298 if (options.version < 460)
8299 {
8300 require_extension_internal("GL_ARB_shader_draw_parameters");
8301 return "gl_BaseVertexARB";
8302 }
8303 return "gl_BaseVertex";
8304 }
8305 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8306 require_extension_internal("GL_ARB_shader_draw_parameters");
8307 return "SPIRV_Cross_BaseVertex";
8308
8309 case BuiltInBaseInstance:
8310 if (options.es)
8311 SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
8312
8313 if (options.vulkan_semantics)
8314 {
8315 if (options.version < 460)
8316 {
8317 require_extension_internal("GL_ARB_shader_draw_parameters");
8318 return "gl_BaseInstanceARB";
8319 }
8320 return "gl_BaseInstance";
8321 }
8322 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8323 require_extension_internal("GL_ARB_shader_draw_parameters");
8324 return "SPIRV_Cross_BaseInstance";
8325
8326 case BuiltInDrawIndex:
8327 if (options.es)
8328 SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
8329
8330 if (options.vulkan_semantics)
8331 {
8332 if (options.version < 460)
8333 {
8334 require_extension_internal("GL_ARB_shader_draw_parameters");
8335 return "gl_DrawIDARB";
8336 }
8337 return "gl_DrawID";
8338 }
8339 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8340 require_extension_internal("GL_ARB_shader_draw_parameters");
8341 return "gl_DrawIDARB";
8342
8343 case BuiltInSampleId:
8344 if (options.es && options.version < 320)
8345 require_extension_internal("GL_OES_sample_variables");
8346 if (!options.es && options.version < 400)
8347 SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
8348 return "gl_SampleID";
8349
8350 case BuiltInSampleMask:
8351 if (options.es && options.version < 320)
8352 require_extension_internal("GL_OES_sample_variables");
8353 if (!options.es && options.version < 400)
8354 SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
8355
8356 if (storage == StorageClassInput)
8357 return "gl_SampleMaskIn";
8358 else
8359 return "gl_SampleMask";
8360
8361 case BuiltInSamplePosition:
8362 if (options.es && options.version < 320)
8363 require_extension_internal("GL_OES_sample_variables");
8364 if (!options.es && options.version < 400)
8365 SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
8366 return "gl_SamplePosition";
8367
8368 case BuiltInViewIndex:
8369 if (options.vulkan_semantics)
8370 return "gl_ViewIndex";
8371 else
8372 return "gl_ViewID_OVR";
8373
8374 case BuiltInNumSubgroups:
8375 request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
8376 return "gl_NumSubgroups";
8377
8378 case BuiltInSubgroupId:
8379 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
8380 return "gl_SubgroupID";
8381
8382 case BuiltInSubgroupSize:
8383 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
8384 return "gl_SubgroupSize";
8385
8386 case BuiltInSubgroupLocalInvocationId:
8387 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
8388 return "gl_SubgroupInvocationID";
8389
8390 case BuiltInSubgroupEqMask:
8391 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8392 return "gl_SubgroupEqMask";
8393
8394 case BuiltInSubgroupGeMask:
8395 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8396 return "gl_SubgroupGeMask";
8397
8398 case BuiltInSubgroupGtMask:
8399 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8400 return "gl_SubgroupGtMask";
8401
8402 case BuiltInSubgroupLeMask:
8403 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8404 return "gl_SubgroupLeMask";
8405
8406 case BuiltInSubgroupLtMask:
8407 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8408 return "gl_SubgroupLtMask";
8409
8410 case BuiltInLaunchIdKHR:
8411 return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
8412 case BuiltInLaunchSizeKHR:
8413 return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
8414 case BuiltInWorldRayOriginKHR:
8415 return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
8416 case BuiltInWorldRayDirectionKHR:
8417 return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
8418 case BuiltInObjectRayOriginKHR:
8419 return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
8420 case BuiltInObjectRayDirectionKHR:
8421 return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
8422 case BuiltInRayTminKHR:
8423 return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
8424 case BuiltInRayTmaxKHR:
8425 return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
8426 case BuiltInInstanceCustomIndexKHR:
8427 return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
8428 case BuiltInObjectToWorldKHR:
8429 return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
8430 case BuiltInWorldToObjectKHR:
8431 return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
8432 case BuiltInHitTNV:
8433 // gl_HitTEXT is an alias of RayTMax in KHR.
8434 return "gl_HitTNV";
8435 case BuiltInHitKindKHR:
8436 return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
8437 case BuiltInIncomingRayFlagsKHR:
8438 return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
8439
8440 case BuiltInBaryCoordNV:
8441 {
8442 if (options.es && options.version < 320)
8443 SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
8444 else if (!options.es && options.version < 450)
8445 SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
8446 require_extension_internal("GL_NV_fragment_shader_barycentric");
8447 return "gl_BaryCoordNV";
8448 }
8449
8450 case BuiltInBaryCoordNoPerspNV:
8451 {
8452 if (options.es && options.version < 320)
8453 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
8454 else if (!options.es && options.version < 450)
8455 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
8456 require_extension_internal("GL_NV_fragment_shader_barycentric");
8457 return "gl_BaryCoordNoPerspNV";
8458 }
8459
8460 case BuiltInFragStencilRefEXT:
8461 {
8462 if (!options.es)
8463 {
8464 require_extension_internal("GL_ARB_shader_stencil_export");
8465 return "gl_FragStencilRefARB";
8466 }
8467 else
8468 SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
8469 }
8470
8471 case BuiltInPrimitiveShadingRateKHR:
8472 {
8473 if (!options.vulkan_semantics)
8474 SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
8475 require_extension_internal("GL_EXT_fragment_shading_rate");
8476 return "gl_PrimitiveShadingRateEXT";
8477 }
8478
8479 case BuiltInShadingRateKHR:
8480 {
8481 if (!options.vulkan_semantics)
8482 SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
8483 require_extension_internal("GL_EXT_fragment_shading_rate");
8484 return "gl_ShadingRateEXT";
8485 }
8486
8487 case BuiltInDeviceIndex:
8488 if (!options.vulkan_semantics)
8489 SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
8490 require_extension_internal("GL_EXT_device_group");
8491 return "gl_DeviceIndex";
8492
8493 case BuiltInFullyCoveredEXT:
8494 if (!options.es)
8495 require_extension_internal("GL_NV_conservative_raster_underestimation");
8496 else
8497 SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
8498 return "gl_FragFullyCoveredNV";
8499
8500 default:
8501 return join("gl_BuiltIn_", convert_to_string(builtin));
8502 }
8503 }
8504
index_to_swizzle(uint32_t index)8505 const char *CompilerGLSL::index_to_swizzle(uint32_t index)
8506 {
8507 switch (index)
8508 {
8509 case 0:
8510 return "x";
8511 case 1:
8512 return "y";
8513 case 2:
8514 return "z";
8515 case 3:
8516 return "w";
8517 default:
8518 SPIRV_CROSS_THROW("Swizzle index out of range");
8519 }
8520 }
8521
access_chain_internal_append_index(std::string & expr,uint32_t,const SPIRType *,AccessChainFlags flags,bool &,uint32_t index)8522 void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
8523 AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
8524 uint32_t index)
8525 {
8526 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8527 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8528
8529 expr += "[";
8530
8531 if (index_is_literal)
8532 expr += convert_to_string(index);
8533 else
8534 expr += to_expression(index, register_expression_read);
8535
8536 expr += "]";
8537 }
8538
access_chain_needs_stage_io_builtin_translation(uint32_t)8539 bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
8540 {
8541 return true;
8542 }
8543
access_chain_internal(uint32_t base,const uint32_t * indices,uint32_t count,AccessChainFlags flags,AccessChainMeta * meta)8544 string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
8545 AccessChainFlags flags, AccessChainMeta *meta)
8546 {
8547 string expr;
8548
8549 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8550 bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
8551 bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
8552 bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
8553 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8554 bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
8555
8556 if (!chain_only)
8557 {
8558 // We handle transpose explicitly, so don't resolve that here.
8559 auto *e = maybe_get<SPIRExpression>(base);
8560 bool old_transpose = e && e->need_transpose;
8561 if (e)
8562 e->need_transpose = false;
8563 expr = to_enclosed_expression(base, register_expression_read);
8564 if (e)
8565 e->need_transpose = old_transpose;
8566 }
8567
8568 // Start traversing type hierarchy at the proper non-pointer types,
8569 // but keep type_id referencing the original pointer for use below.
8570 uint32_t type_id = expression_type_id(base);
8571
8572 if (!backend.native_pointers)
8573 {
8574 if (ptr_chain)
8575 SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
8576
8577 // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
8578 // continuing the access chain.
8579 if (should_dereference(base))
8580 {
8581 auto &type = get<SPIRType>(type_id);
8582 expr = dereference_expression(type, expr);
8583 }
8584 }
8585
8586 const auto *type = &get_pointee_type(type_id);
8587
8588 bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
8589 bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
8590 bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
8591 uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
8592 bool is_invariant = has_decoration(base, DecorationInvariant);
8593 bool pending_array_enclose = false;
8594 bool dimension_flatten = false;
8595
8596 const auto append_index = [&](uint32_t index, bool is_literal) {
8597 AccessChainFlags mod_flags = flags;
8598 if (!is_literal)
8599 mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
8600 access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
8601 };
8602
8603 for (uint32_t i = 0; i < count; i++)
8604 {
8605 uint32_t index = indices[i];
8606
8607 bool is_literal = index_is_literal;
8608 if (is_literal && msb_is_id && (index >> 31u) != 0u)
8609 {
8610 is_literal = false;
8611 index &= 0x7fffffffu;
8612 }
8613
8614 // Pointer chains
8615 if (ptr_chain && i == 0)
8616 {
8617 // If we are flattening multidimensional arrays, only create opening bracket on first
8618 // array index.
8619 if (options.flatten_multidimensional_arrays)
8620 {
8621 dimension_flatten = type->array.size() >= 1;
8622 pending_array_enclose = dimension_flatten;
8623 if (pending_array_enclose)
8624 expr += "[";
8625 }
8626
8627 if (options.flatten_multidimensional_arrays && dimension_flatten)
8628 {
8629 // If we are flattening multidimensional arrays, do manual stride computation.
8630 if (is_literal)
8631 expr += convert_to_string(index);
8632 else
8633 expr += to_enclosed_expression(index, register_expression_read);
8634
8635 for (auto j = uint32_t(type->array.size()); j; j--)
8636 {
8637 expr += " * ";
8638 expr += enclose_expression(to_array_size(*type, j - 1));
8639 }
8640
8641 if (type->array.empty())
8642 pending_array_enclose = false;
8643 else
8644 expr += " + ";
8645
8646 if (!pending_array_enclose)
8647 expr += "]";
8648 }
8649 else
8650 {
8651 append_index(index, is_literal);
8652 }
8653
8654 if (type->basetype == SPIRType::ControlPointArray)
8655 {
8656 type_id = type->parent_type;
8657 type = &get<SPIRType>(type_id);
8658 }
8659
8660 access_chain_is_arrayed = true;
8661 }
8662 // Arrays
8663 else if (!type->array.empty())
8664 {
8665 // If we are flattening multidimensional arrays, only create opening bracket on first
8666 // array index.
8667 if (options.flatten_multidimensional_arrays && !pending_array_enclose)
8668 {
8669 dimension_flatten = type->array.size() > 1;
8670 pending_array_enclose = dimension_flatten;
8671 if (pending_array_enclose)
8672 expr += "[";
8673 }
8674
8675 assert(type->parent_type);
8676
8677 auto *var = maybe_get<SPIRVariable>(base);
8678 if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
8679 !has_decoration(type->self, DecorationBlock))
8680 {
8681 // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
8682 // Normally, these variables live in blocks when compiled from GLSL,
8683 // but HLSL seems to just emit straight arrays here.
8684 // We must pretend this access goes through gl_in/gl_out arrays
8685 // to be able to access certain builtins as arrays.
8686 auto builtin = ir.meta[base].decoration.builtin_type;
8687 switch (builtin)
8688 {
8689 // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
8690 // case BuiltInClipDistance:
8691 case BuiltInPosition:
8692 case BuiltInPointSize:
8693 if (var->storage == StorageClassInput)
8694 expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
8695 else if (var->storage == StorageClassOutput)
8696 expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
8697 else
8698 append_index(index, is_literal);
8699 break;
8700
8701 default:
8702 append_index(index, is_literal);
8703 break;
8704 }
8705 }
8706 else if (options.flatten_multidimensional_arrays && dimension_flatten)
8707 {
8708 // If we are flattening multidimensional arrays, do manual stride computation.
8709 auto &parent_type = get<SPIRType>(type->parent_type);
8710
8711 if (is_literal)
8712 expr += convert_to_string(index);
8713 else
8714 expr += to_enclosed_expression(index, register_expression_read);
8715
8716 for (auto j = uint32_t(parent_type.array.size()); j; j--)
8717 {
8718 expr += " * ";
8719 expr += enclose_expression(to_array_size(parent_type, j - 1));
8720 }
8721
8722 if (parent_type.array.empty())
8723 pending_array_enclose = false;
8724 else
8725 expr += " + ";
8726
8727 if (!pending_array_enclose)
8728 expr += "]";
8729 }
8730 // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
8731 // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
8732 else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
8733 {
8734 append_index(index, is_literal);
8735 }
8736
8737 type_id = type->parent_type;
8738 type = &get<SPIRType>(type_id);
8739
8740 access_chain_is_arrayed = true;
8741 }
8742 // For structs, the index refers to a constant, which indexes into the members.
8743 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
8744 else if (type->basetype == SPIRType::Struct)
8745 {
8746 if (!is_literal)
8747 index = evaluate_constant_u32(index);
8748
8749 if (index >= type->member_types.size())
8750 SPIRV_CROSS_THROW("Member index is out of bounds!");
8751
8752 BuiltIn builtin;
8753 if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
8754 {
8755 if (access_chain_is_arrayed)
8756 {
8757 expr += ".";
8758 expr += builtin_to_glsl(builtin, type->storage);
8759 }
8760 else
8761 expr = builtin_to_glsl(builtin, type->storage);
8762 }
8763 else
8764 {
8765 // If the member has a qualified name, use it as the entire chain
8766 string qual_mbr_name = get_member_qualified_name(type_id, index);
8767 if (!qual_mbr_name.empty())
8768 expr = qual_mbr_name;
8769 else if (flatten_member_reference)
8770 expr += join("_", to_member_name(*type, index));
8771 else
8772 expr += to_member_reference(base, *type, index, ptr_chain);
8773 }
8774
8775 if (has_member_decoration(type->self, index, DecorationInvariant))
8776 is_invariant = true;
8777
8778 is_packed = member_is_packed_physical_type(*type, index);
8779 if (member_is_remapped_physical_type(*type, index))
8780 physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
8781 else
8782 physical_type = 0;
8783
8784 row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
8785 type = &get<SPIRType>(type->member_types[index]);
8786 }
8787 // Matrix -> Vector
8788 else if (type->columns > 1)
8789 {
8790 // If we have a row-major matrix here, we need to defer any transpose in case this access chain
8791 // is used to store a column. We can resolve it right here and now if we access a scalar directly,
8792 // by flipping indexing order of the matrix.
8793
8794 expr += "[";
8795 if (is_literal)
8796 expr += convert_to_string(index);
8797 else
8798 expr += to_expression(index, register_expression_read);
8799 expr += "]";
8800
8801 type_id = type->parent_type;
8802 type = &get<SPIRType>(type_id);
8803 }
8804 // Vector -> Scalar
8805 else if (type->vecsize > 1)
8806 {
8807 string deferred_index;
8808 if (row_major_matrix_needs_conversion)
8809 {
8810 // Flip indexing order.
8811 auto column_index = expr.find_last_of('[');
8812 if (column_index != string::npos)
8813 {
8814 deferred_index = expr.substr(column_index);
8815 expr.resize(column_index);
8816 }
8817 }
8818
8819 // Internally, access chain implementation can also be used on composites,
8820 // ignore scalar access workarounds in this case.
8821 StorageClass effective_storage = StorageClassGeneric;
8822 bool ignore_potential_sliced_writes = false;
8823 if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
8824 {
8825 if (expression_type(base).pointer)
8826 effective_storage = get_expression_effective_storage_class(base);
8827
8828 // Special consideration for control points.
8829 // Control points can only be written by InvocationID, so there is no need
8830 // to consider scalar access chains here.
8831 // Cleans up some cases where it's very painful to determine the accurate storage class
8832 // since blocks can be partially masked ...
8833 auto *var = maybe_get_backing_variable(base);
8834 if (var && var->storage == StorageClassOutput &&
8835 get_execution_model() == ExecutionModelTessellationControl &&
8836 !has_decoration(var->self, DecorationPatch))
8837 {
8838 ignore_potential_sliced_writes = true;
8839 }
8840 }
8841 else
8842 ignore_potential_sliced_writes = true;
8843
8844 if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
8845 {
8846 // On some backends, we might not be able to safely access individual scalars in a vector.
8847 // To work around this, we might have to cast the access chain reference to something which can,
8848 // like a pointer to scalar, which we can then index into.
8849 prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8850 is_packed);
8851 }
8852
8853 if (is_literal && !is_packed && !row_major_matrix_needs_conversion)
8854 {
8855 expr += ".";
8856 expr += index_to_swizzle(index);
8857 }
8858 else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
8859 {
8860 auto &c = get<SPIRConstant>(index);
8861 if (c.specialization)
8862 {
8863 // If the index is a spec constant, we cannot turn extract into a swizzle.
8864 expr += join("[", to_expression(index), "]");
8865 }
8866 else
8867 {
8868 expr += ".";
8869 expr += index_to_swizzle(c.scalar());
8870 }
8871 }
8872 else if (is_literal)
8873 {
8874 // For packed vectors, we can only access them as an array, not by swizzle.
8875 expr += join("[", index, "]");
8876 }
8877 else
8878 {
8879 expr += "[";
8880 expr += to_expression(index, register_expression_read);
8881 expr += "]";
8882 }
8883
8884 if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
8885 {
8886 prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8887 is_packed);
8888 }
8889
8890 expr += deferred_index;
8891 row_major_matrix_needs_conversion = false;
8892
8893 is_packed = false;
8894 physical_type = 0;
8895 type_id = type->parent_type;
8896 type = &get<SPIRType>(type_id);
8897 }
8898 else if (!backend.allow_truncated_access_chain)
8899 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
8900 }
8901
8902 if (pending_array_enclose)
8903 {
8904 SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
8905 "but the access chain was terminated in the middle of a multidimensional array. "
8906 "This is not supported.");
8907 }
8908
8909 if (meta)
8910 {
8911 meta->need_transpose = row_major_matrix_needs_conversion;
8912 meta->storage_is_packed = is_packed;
8913 meta->storage_is_invariant = is_invariant;
8914 meta->storage_physical_type = physical_type;
8915 }
8916
8917 return expr;
8918 }
8919
prepare_access_chain_for_scalar_access(std::string &,const SPIRType &,spv::StorageClass,bool &)8920 void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
8921 {
8922 }
8923
to_flattened_struct_member(const string & basename,const SPIRType & type,uint32_t index)8924 string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
8925 {
8926 auto ret = join(basename, "_", to_member_name(type, index));
8927 ParsedIR::sanitize_underscores(ret);
8928 return ret;
8929 }
8930
access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,AccessChainMeta * meta,bool ptr_chain)8931 string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
8932 AccessChainMeta *meta, bool ptr_chain)
8933 {
8934 if (flattened_buffer_blocks.count(base))
8935 {
8936 uint32_t matrix_stride = 0;
8937 uint32_t array_stride = 0;
8938 bool need_transpose = false;
8939 flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
8940 &array_stride, ptr_chain);
8941
8942 if (meta)
8943 {
8944 meta->need_transpose = target_type.columns > 1 && need_transpose;
8945 meta->storage_is_packed = false;
8946 }
8947
8948 return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
8949 need_transpose);
8950 }
8951 else if (flattened_structs.count(base) && count > 0)
8952 {
8953 AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
8954 if (ptr_chain)
8955 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
8956
8957 if (flattened_structs[base])
8958 {
8959 flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
8960 if (meta)
8961 meta->flattened_struct = target_type.basetype == SPIRType::Struct;
8962 }
8963
8964 auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
8965 if (meta)
8966 {
8967 meta->need_transpose = false;
8968 meta->storage_is_packed = false;
8969 }
8970
8971 auto basename = to_flattened_access_chain_expression(base);
8972 auto ret = join(basename, "_", chain);
8973 ParsedIR::sanitize_underscores(ret);
8974 return ret;
8975 }
8976 else
8977 {
8978 AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
8979 if (ptr_chain)
8980 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
8981 return access_chain_internal(base, indices, count, flags, meta);
8982 }
8983 }
8984
load_flattened_struct(const string & basename,const SPIRType & type)8985 string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
8986 {
8987 auto expr = type_to_glsl_constructor(type);
8988 expr += '(';
8989
8990 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
8991 {
8992 if (i)
8993 expr += ", ";
8994
8995 auto &member_type = get<SPIRType>(type.member_types[i]);
8996 if (member_type.basetype == SPIRType::Struct)
8997 expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
8998 else
8999 expr += to_flattened_struct_member(basename, type, i);
9000 }
9001 expr += ')';
9002 return expr;
9003 }
9004
to_flattened_access_chain_expression(uint32_t id)9005 std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
9006 {
9007 // Do not use to_expression as that will unflatten access chains.
9008 string basename;
9009 if (const auto *var = maybe_get<SPIRVariable>(id))
9010 basename = to_name(var->self);
9011 else if (const auto *expr = maybe_get<SPIRExpression>(id))
9012 basename = expr->expression;
9013 else
9014 basename = to_expression(id);
9015
9016 return basename;
9017 }
9018
store_flattened_struct(const string & basename,uint32_t rhs_id,const SPIRType & type,const SmallVector<uint32_t> & indices)9019 void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
9020 const SmallVector<uint32_t> &indices)
9021 {
9022 SmallVector<uint32_t> sub_indices = indices;
9023 sub_indices.push_back(0);
9024
9025 auto *member_type = &type;
9026 for (auto &index : indices)
9027 member_type = &get<SPIRType>(member_type->member_types[index]);
9028
9029 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
9030 {
9031 sub_indices.back() = i;
9032 auto lhs = join(basename, "_", to_member_name(*member_type, i));
9033 ParsedIR::sanitize_underscores(lhs);
9034
9035 if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
9036 {
9037 store_flattened_struct(lhs, rhs_id, type, sub_indices);
9038 }
9039 else
9040 {
9041 auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
9042 statement(lhs, " = ", rhs, ";");
9043 }
9044 }
9045 }
9046
store_flattened_struct(uint32_t lhs_id,uint32_t value)9047 void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
9048 {
9049 auto &type = expression_type(lhs_id);
9050 auto basename = to_flattened_access_chain_expression(lhs_id);
9051 store_flattened_struct(basename, value, type, {});
9052 }
9053
flattened_access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,uint32_t,bool need_transpose)9054 std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
9055 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
9056 uint32_t /* array_stride */, bool need_transpose)
9057 {
9058 if (!target_type.array.empty())
9059 SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
9060 else if (target_type.basetype == SPIRType::Struct)
9061 return flattened_access_chain_struct(base, indices, count, target_type, offset);
9062 else if (target_type.columns > 1)
9063 return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9064 else
9065 return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9066 }
9067
flattened_access_chain_struct(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset)9068 std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
9069 const SPIRType &target_type, uint32_t offset)
9070 {
9071 std::string expr;
9072
9073 expr += type_to_glsl_constructor(target_type);
9074 expr += "(";
9075
9076 for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
9077 {
9078 if (i != 0)
9079 expr += ", ";
9080
9081 const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
9082 uint32_t member_offset = type_struct_member_offset(target_type, i);
9083
9084 // The access chain terminates at the struct, so we need to find matrix strides and row-major information
9085 // ahead of time.
9086 bool need_transpose = false;
9087 uint32_t matrix_stride = 0;
9088 if (member_type.columns > 1)
9089 {
9090 need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
9091 matrix_stride = type_struct_member_matrix_stride(target_type, i);
9092 }
9093
9094 auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
9095 0 /* array_stride */, need_transpose);
9096
9097 // Cannot forward transpositions, so resolve them here.
9098 if (need_transpose)
9099 expr += convert_row_major_matrix(tmp, member_type, 0, false);
9100 else
9101 expr += tmp;
9102 }
9103
9104 expr += ")";
9105
9106 return expr;
9107 }
9108
flattened_access_chain_matrix(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)9109 std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
9110 const SPIRType &target_type, uint32_t offset,
9111 uint32_t matrix_stride, bool need_transpose)
9112 {
9113 assert(matrix_stride);
9114 SPIRType tmp_type = target_type;
9115 if (need_transpose)
9116 swap(tmp_type.vecsize, tmp_type.columns);
9117
9118 std::string expr;
9119
9120 expr += type_to_glsl_constructor(tmp_type);
9121 expr += "(";
9122
9123 for (uint32_t i = 0; i < tmp_type.columns; i++)
9124 {
9125 if (i != 0)
9126 expr += ", ";
9127
9128 expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
9129 /* need_transpose= */ false);
9130 }
9131
9132 expr += ")";
9133
9134 return expr;
9135 }
9136
flattened_access_chain_vector(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)9137 std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
9138 const SPIRType &target_type, uint32_t offset,
9139 uint32_t matrix_stride, bool need_transpose)
9140 {
9141 auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
9142
9143 auto buffer_name = to_name(expression_type(base).self);
9144
9145 if (need_transpose)
9146 {
9147 std::string expr;
9148
9149 if (target_type.vecsize > 1)
9150 {
9151 expr += type_to_glsl_constructor(target_type);
9152 expr += "(";
9153 }
9154
9155 for (uint32_t i = 0; i < target_type.vecsize; ++i)
9156 {
9157 if (i != 0)
9158 expr += ", ";
9159
9160 uint32_t component_offset = result.second + i * matrix_stride;
9161
9162 assert(component_offset % (target_type.width / 8) == 0);
9163 uint32_t index = component_offset / (target_type.width / 8);
9164
9165 expr += buffer_name;
9166 expr += "[";
9167 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
9168 expr += convert_to_string(index / 4);
9169 expr += "]";
9170
9171 expr += vector_swizzle(1, index % 4);
9172 }
9173
9174 if (target_type.vecsize > 1)
9175 {
9176 expr += ")";
9177 }
9178
9179 return expr;
9180 }
9181 else
9182 {
9183 assert(result.second % (target_type.width / 8) == 0);
9184 uint32_t index = result.second / (target_type.width / 8);
9185
9186 std::string expr;
9187
9188 expr += buffer_name;
9189 expr += "[";
9190 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
9191 expr += convert_to_string(index / 4);
9192 expr += "]";
9193
9194 expr += vector_swizzle(target_type.vecsize, index % 4);
9195
9196 return expr;
9197 }
9198 }
9199
flattened_access_chain_offset(const SPIRType & basetype,const uint32_t * indices,uint32_t count,uint32_t offset,uint32_t word_stride,bool * need_transpose,uint32_t * out_matrix_stride,uint32_t * out_array_stride,bool ptr_chain)9200 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
9201 const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
9202 bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
9203 {
9204 // Start traversing type hierarchy at the proper non-pointer types.
9205 const auto *type = &get_pointee_type(basetype);
9206
9207 std::string expr;
9208
9209 // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
9210 bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
9211 uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
9212 uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
9213
9214 for (uint32_t i = 0; i < count; i++)
9215 {
9216 uint32_t index = indices[i];
9217
9218 // Pointers
9219 if (ptr_chain && i == 0)
9220 {
9221 // Here, the pointer type will be decorated with an array stride.
9222 array_stride = get_decoration(basetype.self, DecorationArrayStride);
9223 if (!array_stride)
9224 SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
9225
9226 auto *constant = maybe_get<SPIRConstant>(index);
9227 if (constant)
9228 {
9229 // Constant array access.
9230 offset += constant->scalar() * array_stride;
9231 }
9232 else
9233 {
9234 // Dynamic array access.
9235 if (array_stride % word_stride)
9236 {
9237 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9238 "of a 4-component vector. "
9239 "Likely culprit here is a float or vec2 array inside a push "
9240 "constant block which is std430. "
9241 "This cannot be flattened. Try using std140 layout instead.");
9242 }
9243
9244 expr += to_enclosed_expression(index);
9245 expr += " * ";
9246 expr += convert_to_string(array_stride / word_stride);
9247 expr += " + ";
9248 }
9249 }
9250 // Arrays
9251 else if (!type->array.empty())
9252 {
9253 auto *constant = maybe_get<SPIRConstant>(index);
9254 if (constant)
9255 {
9256 // Constant array access.
9257 offset += constant->scalar() * array_stride;
9258 }
9259 else
9260 {
9261 // Dynamic array access.
9262 if (array_stride % word_stride)
9263 {
9264 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9265 "of a 4-component vector. "
9266 "Likely culprit here is a float or vec2 array inside a push "
9267 "constant block which is std430. "
9268 "This cannot be flattened. Try using std140 layout instead.");
9269 }
9270
9271 expr += to_enclosed_expression(index, false);
9272 expr += " * ";
9273 expr += convert_to_string(array_stride / word_stride);
9274 expr += " + ";
9275 }
9276
9277 uint32_t parent_type = type->parent_type;
9278 type = &get<SPIRType>(parent_type);
9279
9280 if (!type->array.empty())
9281 array_stride = get_decoration(parent_type, DecorationArrayStride);
9282 }
9283 // For structs, the index refers to a constant, which indexes into the members.
9284 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
9285 else if (type->basetype == SPIRType::Struct)
9286 {
9287 index = evaluate_constant_u32(index);
9288
9289 if (index >= type->member_types.size())
9290 SPIRV_CROSS_THROW("Member index is out of bounds!");
9291
9292 offset += type_struct_member_offset(*type, index);
9293
9294 auto &struct_type = *type;
9295 type = &get<SPIRType>(type->member_types[index]);
9296
9297 if (type->columns > 1)
9298 {
9299 matrix_stride = type_struct_member_matrix_stride(struct_type, index);
9300 row_major_matrix_needs_conversion =
9301 combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
9302 }
9303 else
9304 row_major_matrix_needs_conversion = false;
9305
9306 if (!type->array.empty())
9307 array_stride = type_struct_member_array_stride(struct_type, index);
9308 }
9309 // Matrix -> Vector
9310 else if (type->columns > 1)
9311 {
9312 auto *constant = maybe_get<SPIRConstant>(index);
9313 if (constant)
9314 {
9315 index = evaluate_constant_u32(index);
9316 offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
9317 }
9318 else
9319 {
9320 uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
9321 // Dynamic array access.
9322 if (indexing_stride % word_stride)
9323 {
9324 SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
9325 "4-component vector. "
9326 "Likely culprit here is a row-major matrix being accessed dynamically. "
9327 "This cannot be flattened. Try using std140 layout instead.");
9328 }
9329
9330 expr += to_enclosed_expression(index, false);
9331 expr += " * ";
9332 expr += convert_to_string(indexing_stride / word_stride);
9333 expr += " + ";
9334 }
9335
9336 type = &get<SPIRType>(type->parent_type);
9337 }
9338 // Vector -> Scalar
9339 else if (type->vecsize > 1)
9340 {
9341 auto *constant = maybe_get<SPIRConstant>(index);
9342 if (constant)
9343 {
9344 index = evaluate_constant_u32(index);
9345 offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
9346 }
9347 else
9348 {
9349 uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
9350
9351 // Dynamic array access.
9352 if (indexing_stride % word_stride)
9353 {
9354 SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
9355 "size of a 4-component vector. "
9356 "This cannot be flattened in legacy targets.");
9357 }
9358
9359 expr += to_enclosed_expression(index, false);
9360 expr += " * ";
9361 expr += convert_to_string(indexing_stride / word_stride);
9362 expr += " + ";
9363 }
9364
9365 type = &get<SPIRType>(type->parent_type);
9366 }
9367 else
9368 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9369 }
9370
9371 if (need_transpose)
9372 *need_transpose = row_major_matrix_needs_conversion;
9373 if (out_matrix_stride)
9374 *out_matrix_stride = matrix_stride;
9375 if (out_array_stride)
9376 *out_array_stride = array_stride;
9377
9378 return std::make_pair(expr, offset);
9379 }
9380
should_dereference(uint32_t id)9381 bool CompilerGLSL::should_dereference(uint32_t id)
9382 {
9383 const auto &type = expression_type(id);
9384 // Non-pointer expressions don't need to be dereferenced.
9385 if (!type.pointer)
9386 return false;
9387
9388 // Handles shouldn't be dereferenced either.
9389 if (!expression_is_lvalue(id))
9390 return false;
9391
9392 // If id is a variable but not a phi variable, we should not dereference it.
9393 if (auto *var = maybe_get<SPIRVariable>(id))
9394 return var->phi_variable;
9395
9396 // If id is an access chain, we should not dereference it.
9397 if (auto *expr = maybe_get<SPIRExpression>(id))
9398 return !expr->access_chain;
9399
9400 // Otherwise, we should dereference this pointer expression.
9401 return true;
9402 }
9403
should_forward(uint32_t id) const9404 bool CompilerGLSL::should_forward(uint32_t id) const
9405 {
9406 // If id is a variable we will try to forward it regardless of force_temporary check below
9407 // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
9408 auto *var = maybe_get<SPIRVariable>(id);
9409 if (var && var->forwardable)
9410 return true;
9411
9412 // For debugging emit temporary variables for all expressions
9413 if (options.force_temporary)
9414 return false;
9415
9416 // Immutable expression can always be forwarded.
9417 if (is_immutable(id))
9418 return true;
9419
9420 return false;
9421 }
9422
should_suppress_usage_tracking(uint32_t id) const9423 bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
9424 {
9425 // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
9426 return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
9427 }
9428
track_expression_read(uint32_t id)9429 void CompilerGLSL::track_expression_read(uint32_t id)
9430 {
9431 switch (ir.ids[id].get_type())
9432 {
9433 case TypeExpression:
9434 {
9435 auto &e = get<SPIRExpression>(id);
9436 for (auto implied_read : e.implied_read_expressions)
9437 track_expression_read(implied_read);
9438 break;
9439 }
9440
9441 case TypeAccessChain:
9442 {
9443 auto &e = get<SPIRAccessChain>(id);
9444 for (auto implied_read : e.implied_read_expressions)
9445 track_expression_read(implied_read);
9446 break;
9447 }
9448
9449 default:
9450 break;
9451 }
9452
9453 // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
9454 // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
9455 if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
9456 {
9457 auto &v = expression_usage_counts[id];
9458 v++;
9459
9460 // If we create an expression outside a loop,
9461 // but access it inside a loop, we're implicitly reading it multiple times.
9462 // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
9463 // working inside the backend compiler.
9464 if (expression_read_implies_multiple_reads(id))
9465 v++;
9466
9467 if (v >= 2)
9468 {
9469 //if (v == 2)
9470 // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
9471
9472 forced_temporaries.insert(id);
9473 // Force a recompile after this pass to avoid forwarding this variable.
9474 force_recompile();
9475 }
9476 }
9477 }
9478
args_will_forward(uint32_t id,const uint32_t * args,uint32_t num_args,bool pure)9479 bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
9480 {
9481 if (forced_temporaries.find(id) != end(forced_temporaries))
9482 return false;
9483
9484 for (uint32_t i = 0; i < num_args; i++)
9485 if (!should_forward(args[i]))
9486 return false;
9487
9488 // We need to forward globals as well.
9489 if (!pure)
9490 {
9491 for (auto global : global_variables)
9492 if (!should_forward(global))
9493 return false;
9494 for (auto aliased : aliased_variables)
9495 if (!should_forward(aliased))
9496 return false;
9497 }
9498
9499 return true;
9500 }
9501
register_impure_function_call()9502 void CompilerGLSL::register_impure_function_call()
9503 {
9504 // Impure functions can modify globals and aliased variables, so invalidate them as well.
9505 for (auto global : global_variables)
9506 flush_dependees(get<SPIRVariable>(global));
9507 for (auto aliased : aliased_variables)
9508 flush_dependees(get<SPIRVariable>(aliased));
9509 }
9510
register_call_out_argument(uint32_t id)9511 void CompilerGLSL::register_call_out_argument(uint32_t id)
9512 {
9513 register_write(id);
9514
9515 auto *var = maybe_get<SPIRVariable>(id);
9516 if (var)
9517 flush_variable_declaration(var->self);
9518 }
9519
variable_decl_function_local(SPIRVariable & var)9520 string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
9521 {
9522 // These variables are always function local,
9523 // so make sure we emit the variable without storage qualifiers.
9524 // Some backends will inject custom variables locally in a function
9525 // with a storage qualifier which is not function-local.
9526 auto old_storage = var.storage;
9527 var.storage = StorageClassFunction;
9528 auto expr = variable_decl(var);
9529 var.storage = old_storage;
9530 return expr;
9531 }
9532
emit_variable_temporary_copies(const SPIRVariable & var)9533 void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
9534 {
9535 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9536 if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
9537 {
9538 auto &type = get<SPIRType>(var.basetype);
9539 auto &flags = get_decoration_bitset(var.self);
9540 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
9541 flushed_phi_variables.insert(var.self);
9542 }
9543 }
9544
flush_variable_declaration(uint32_t id)9545 void CompilerGLSL::flush_variable_declaration(uint32_t id)
9546 {
9547 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9548 auto *var = maybe_get<SPIRVariable>(id);
9549 if (var && var->deferred_declaration)
9550 {
9551 string initializer;
9552 if (options.force_zero_initialized_variables &&
9553 (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
9554 var->storage == StorageClassPrivate) &&
9555 !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
9556 {
9557 initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
9558 }
9559
9560 statement(variable_decl_function_local(*var), initializer, ";");
9561 var->deferred_declaration = false;
9562 }
9563 if (var)
9564 {
9565 emit_variable_temporary_copies(*var);
9566 }
9567 }
9568
remove_duplicate_swizzle(string & op)9569 bool CompilerGLSL::remove_duplicate_swizzle(string &op)
9570 {
9571 auto pos = op.find_last_of('.');
9572 if (pos == string::npos || pos == 0)
9573 return false;
9574
9575 string final_swiz = op.substr(pos + 1, string::npos);
9576
9577 if (backend.swizzle_is_function)
9578 {
9579 if (final_swiz.size() < 2)
9580 return false;
9581
9582 if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9583 final_swiz.erase(final_swiz.size() - 2, string::npos);
9584 else
9585 return false;
9586 }
9587
9588 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9589 // If so, and previous swizzle is of same length,
9590 // we can drop the final swizzle altogether.
9591 for (uint32_t i = 0; i < final_swiz.size(); i++)
9592 {
9593 static const char expected[] = { 'x', 'y', 'z', 'w' };
9594 if (i >= 4 || final_swiz[i] != expected[i])
9595 return false;
9596 }
9597
9598 auto prevpos = op.find_last_of('.', pos - 1);
9599 if (prevpos == string::npos)
9600 return false;
9601
9602 prevpos++;
9603
9604 // Make sure there are only swizzles here ...
9605 for (auto i = prevpos; i < pos; i++)
9606 {
9607 if (op[i] < 'w' || op[i] > 'z')
9608 {
9609 // If swizzles are foo.xyz() like in C++ backend for example, check for that.
9610 if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
9611 break;
9612 return false;
9613 }
9614 }
9615
9616 // If original swizzle is large enough, just carve out the components we need.
9617 // E.g. foobar.wyx.xy will turn into foobar.wy.
9618 if (pos - prevpos >= final_swiz.size())
9619 {
9620 op.erase(prevpos + final_swiz.size(), string::npos);
9621
9622 // Add back the function call ...
9623 if (backend.swizzle_is_function)
9624 op += "()";
9625 }
9626 return true;
9627 }
9628
9629 // Optimizes away vector swizzles where we have something like
9630 // vec3 foo;
9631 // foo.xyz <-- swizzle expression does nothing.
9632 // This is a very common pattern after OpCompositeCombine.
remove_unity_swizzle(uint32_t base,string & op)9633 bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
9634 {
9635 auto pos = op.find_last_of('.');
9636 if (pos == string::npos || pos == 0)
9637 return false;
9638
9639 string final_swiz = op.substr(pos + 1, string::npos);
9640
9641 if (backend.swizzle_is_function)
9642 {
9643 if (final_swiz.size() < 2)
9644 return false;
9645
9646 if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9647 final_swiz.erase(final_swiz.size() - 2, string::npos);
9648 else
9649 return false;
9650 }
9651
9652 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9653 // If so, and previous swizzle is of same length,
9654 // we can drop the final swizzle altogether.
9655 for (uint32_t i = 0; i < final_swiz.size(); i++)
9656 {
9657 static const char expected[] = { 'x', 'y', 'z', 'w' };
9658 if (i >= 4 || final_swiz[i] != expected[i])
9659 return false;
9660 }
9661
9662 auto &type = expression_type(base);
9663
9664 // Sanity checking ...
9665 assert(type.columns == 1 && type.array.empty());
9666
9667 if (type.vecsize == final_swiz.size())
9668 op.erase(pos, string::npos);
9669 return true;
9670 }
9671
build_composite_combiner(uint32_t return_type,const uint32_t * elems,uint32_t length)9672 string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
9673 {
9674 ID base = 0;
9675 string op;
9676 string subop;
9677
9678 // Can only merge swizzles for vectors.
9679 auto &type = get<SPIRType>(return_type);
9680 bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
9681 bool swizzle_optimization = false;
9682
9683 for (uint32_t i = 0; i < length; i++)
9684 {
9685 auto *e = maybe_get<SPIRExpression>(elems[i]);
9686
9687 // If we're merging another scalar which belongs to the same base
9688 // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
9689 if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
9690 {
9691 // Only supposed to be used for vector swizzle -> scalar.
9692 assert(!e->expression.empty() && e->expression.front() == '.');
9693 subop += e->expression.substr(1, string::npos);
9694 swizzle_optimization = true;
9695 }
9696 else
9697 {
9698 // We'll likely end up with duplicated swizzles, e.g.
9699 // foobar.xyz.xyz from patterns like
9700 // OpVectorShuffle
9701 // OpCompositeExtract x 3
9702 // OpCompositeConstruct 3x + other scalar.
9703 // Just modify op in-place.
9704 if (swizzle_optimization)
9705 {
9706 if (backend.swizzle_is_function)
9707 subop += "()";
9708
9709 // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
9710 // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
9711 // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
9712 // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
9713 // Case 1:
9714 // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
9715 // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
9716 // Case 2:
9717 // foo.xyz: Duplicate swizzle won't kick in.
9718 // If foo is vec3, we can remove xyz, giving just foo.
9719 if (!remove_duplicate_swizzle(subop))
9720 remove_unity_swizzle(base, subop);
9721
9722 // Strips away redundant parens if we created them during component extraction.
9723 strip_enclosed_expression(subop);
9724 swizzle_optimization = false;
9725 op += subop;
9726 }
9727 else
9728 op += subop;
9729
9730 if (i)
9731 op += ", ";
9732
9733 bool uses_buffer_offset =
9734 type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
9735 subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
9736 }
9737
9738 base = e ? e->base_expression : ID(0);
9739 }
9740
9741 if (swizzle_optimization)
9742 {
9743 if (backend.swizzle_is_function)
9744 subop += "()";
9745
9746 if (!remove_duplicate_swizzle(subop))
9747 remove_unity_swizzle(base, subop);
9748 // Strips away redundant parens if we created them during component extraction.
9749 strip_enclosed_expression(subop);
9750 }
9751
9752 op += subop;
9753 return op;
9754 }
9755
skip_argument(uint32_t id) const9756 bool CompilerGLSL::skip_argument(uint32_t id) const
9757 {
9758 if (!combined_image_samplers.empty() || !options.vulkan_semantics)
9759 {
9760 auto &type = expression_type(id);
9761 if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
9762 return true;
9763 }
9764 return false;
9765 }
9766
optimize_read_modify_write(const SPIRType & type,const string & lhs,const string & rhs)9767 bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
9768 {
9769 // Do this with strings because we have a very clear pattern we can check for and it avoids
9770 // adding lots of special cases to the code emission.
9771 if (rhs.size() < lhs.size() + 3)
9772 return false;
9773
9774 // Do not optimize matrices. They are a bit awkward to reason about in general
9775 // (in which order does operation happen?), and it does not work on MSL anyways.
9776 if (type.vecsize > 1 && type.columns > 1)
9777 return false;
9778
9779 auto index = rhs.find(lhs);
9780 if (index != 0)
9781 return false;
9782
9783 // TODO: Shift operators, but it's not important for now.
9784 auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
9785 if (op != lhs.size() + 1)
9786 return false;
9787
9788 // Check that the op is followed by space. This excludes && and ||.
9789 if (rhs[op + 1] != ' ')
9790 return false;
9791
9792 char bop = rhs[op];
9793 auto expr = rhs.substr(lhs.size() + 3);
9794 // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
9795 // Find some common patterns which are equivalent.
9796 if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
9797 statement(lhs, bop, bop, ";");
9798 else
9799 statement(lhs, " ", bop, "= ", expr, ";");
9800 return true;
9801 }
9802
register_control_dependent_expression(uint32_t expr)9803 void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
9804 {
9805 if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
9806 return;
9807
9808 assert(current_emitting_block);
9809 current_emitting_block->invalidate_expressions.push_back(expr);
9810 }
9811
emit_block_instructions(SPIRBlock & block)9812 void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
9813 {
9814 current_emitting_block = █
9815 for (auto &op : block.ops)
9816 emit_instruction(op);
9817 current_emitting_block = nullptr;
9818 }
9819
disallow_forwarding_in_expression_chain(const SPIRExpression & expr)9820 void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
9821 {
9822 // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
9823 // these will be marked as having suppressed usage tracking.
9824 // Our only concern is to make sure arithmetic operations are done in similar ways.
9825 if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
9826 forced_invariant_temporaries.count(expr.self) == 0)
9827 {
9828 forced_temporaries.insert(expr.self);
9829 forced_invariant_temporaries.insert(expr.self);
9830 force_recompile();
9831
9832 for (auto &dependent : expr.expression_dependencies)
9833 disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
9834 }
9835 }
9836
handle_store_to_invariant_variable(uint32_t store_id,uint32_t value_id)9837 void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
9838 {
9839 // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
9840 // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
9841 // in one translation unit, but not another, e.g. due to multiple use of an expression.
9842 // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
9843 // expressions to be temporaries.
9844 // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
9845 // for all reasonable uses of invariant.
9846 if (!has_decoration(store_id, DecorationInvariant))
9847 return;
9848
9849 auto *expr = maybe_get<SPIRExpression>(value_id);
9850 if (!expr)
9851 return;
9852
9853 disallow_forwarding_in_expression_chain(*expr);
9854 }
9855
emit_store_statement(uint32_t lhs_expression,uint32_t rhs_expression)9856 void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
9857 {
9858 auto rhs = to_pointer_expression(rhs_expression);
9859
9860 // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
9861 if (!rhs.empty())
9862 {
9863 handle_store_to_invariant_variable(lhs_expression, rhs_expression);
9864
9865 if (!unroll_array_to_complex_store(lhs_expression, rhs_expression))
9866 {
9867 auto lhs = to_dereferenced_expression(lhs_expression);
9868 if (has_decoration(lhs_expression, DecorationNonUniform))
9869 convert_non_uniform_expression(lhs, lhs_expression);
9870
9871 // We might need to cast in order to store to a builtin.
9872 cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
9873
9874 // Tries to optimize assignments like "<lhs> = <lhs> op expr".
9875 // While this is purely cosmetic, this is important for legacy ESSL where loop
9876 // variable increments must be in either i++ or i += const-expr.
9877 // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
9878 if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
9879 statement(lhs, " = ", rhs, ";");
9880 }
9881 register_write(lhs_expression);
9882 }
9883 }
9884
get_integer_width_for_instruction(const Instruction & instr) const9885 uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
9886 {
9887 if (instr.length < 3)
9888 return 32;
9889
9890 auto *ops = stream(instr);
9891
9892 switch (instr.op)
9893 {
9894 case OpSConvert:
9895 case OpConvertSToF:
9896 case OpUConvert:
9897 case OpConvertUToF:
9898 case OpIEqual:
9899 case OpINotEqual:
9900 case OpSLessThan:
9901 case OpSLessThanEqual:
9902 case OpSGreaterThan:
9903 case OpSGreaterThanEqual:
9904 case OpULessThan:
9905 case OpULessThanEqual:
9906 case OpUGreaterThan:
9907 case OpUGreaterThanEqual:
9908 return expression_type(ops[2]).width;
9909
9910 default:
9911 {
9912 // We can look at result type which is more robust.
9913 auto *type = maybe_get<SPIRType>(ops[0]);
9914 if (type && type_is_integral(*type))
9915 return type->width;
9916 else
9917 return 32;
9918 }
9919 }
9920 }
9921
get_integer_width_for_glsl_instruction(GLSLstd450 op,const uint32_t * ops,uint32_t length) const9922 uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
9923 {
9924 if (length < 1)
9925 return 32;
9926
9927 switch (op)
9928 {
9929 case GLSLstd450SAbs:
9930 case GLSLstd450SSign:
9931 case GLSLstd450UMin:
9932 case GLSLstd450SMin:
9933 case GLSLstd450UMax:
9934 case GLSLstd450SMax:
9935 case GLSLstd450UClamp:
9936 case GLSLstd450SClamp:
9937 case GLSLstd450FindSMsb:
9938 case GLSLstd450FindUMsb:
9939 return expression_type(ops[0]).width;
9940
9941 default:
9942 {
9943 // We don't need to care about other opcodes, just return 32.
9944 return 32;
9945 }
9946 }
9947 }
9948
emit_instruction(const Instruction & instruction)9949 void CompilerGLSL::emit_instruction(const Instruction &instruction)
9950 {
9951 auto ops = stream(instruction);
9952 auto opcode = static_cast<Op>(instruction.op);
9953 uint32_t length = instruction.length;
9954
9955 #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
9956 #define GLSL_BOP_CAST(op, type) \
9957 emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
9958 #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
9959 #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
9960 #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
9961 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
9962 #define GLSL_BFOP_CAST(op, type) \
9963 emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
9964 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
9965 #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
9966
9967 // If we need to do implicit bitcasts, make sure we do it with the correct type.
9968 uint32_t integer_width = get_integer_width_for_instruction(instruction);
9969 auto int_type = to_signed_basetype(integer_width);
9970 auto uint_type = to_unsigned_basetype(integer_width);
9971
9972 switch (opcode)
9973 {
9974 // Dealing with memory
9975 case OpLoad:
9976 {
9977 uint32_t result_type = ops[0];
9978 uint32_t id = ops[1];
9979 uint32_t ptr = ops[2];
9980
9981 flush_variable_declaration(ptr);
9982
9983 // If we're loading from memory that cannot be changed by the shader,
9984 // just forward the expression directly to avoid needless temporaries.
9985 // If an expression is mutable and forwardable, we speculate that it is immutable.
9986 bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
9987
9988 // If loading a non-native row-major matrix, mark the expression as need_transpose.
9989 bool need_transpose = false;
9990 bool old_need_transpose = false;
9991
9992 auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
9993
9994 if (forward)
9995 {
9996 // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
9997 // taking the expression.
9998 if (ptr_expression && ptr_expression->need_transpose)
9999 {
10000 old_need_transpose = true;
10001 ptr_expression->need_transpose = false;
10002 need_transpose = true;
10003 }
10004 else if (is_non_native_row_major_matrix(ptr))
10005 need_transpose = true;
10006 }
10007
10008 // If we are forwarding this load,
10009 // don't register the read to access chain here, defer that to when we actually use the expression,
10010 // using the add_implied_read_expression mechanism.
10011 string expr;
10012
10013 bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
10014 bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
10015 if (forward || (!is_packed && !is_remapped))
10016 {
10017 // For the simple case, we do not need to deal with repacking.
10018 expr = to_dereferenced_expression(ptr, false);
10019 }
10020 else
10021 {
10022 // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
10023 // storing the expression to a temporary.
10024 expr = to_unpacked_expression(ptr);
10025 }
10026
10027 auto &type = get<SPIRType>(result_type);
10028 auto &expr_type = expression_type(ptr);
10029
10030 // If the expression has more vector components than the result type, insert
10031 // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
10032 // happen with e.g. the MSL backend replacing the type of an input variable.
10033 if (expr_type.vecsize > type.vecsize)
10034 expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
10035
10036 // We might need to cast in order to load from a builtin.
10037 cast_from_builtin_load(ptr, expr, type);
10038
10039 // We might be trying to load a gl_Position[N], where we should be
10040 // doing float4[](gl_in[i].gl_Position, ...) instead.
10041 // Similar workarounds are required for input arrays in tessellation.
10042 // Also, loading from gl_SampleMask array needs special unroll.
10043 unroll_array_from_complex_load(id, ptr, expr);
10044
10045 if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform))
10046 {
10047 // If we're loading something non-opaque, we need to handle non-uniform descriptor access.
10048 convert_non_uniform_expression(expr, ptr);
10049 }
10050
10051 if (forward && ptr_expression)
10052 ptr_expression->need_transpose = old_need_transpose;
10053
10054 bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
10055
10056 if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
10057 rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
10058
10059 // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
10060 // However, if we try to load a complex, composite object from a flattened buffer,
10061 // we should avoid emitting the same code over and over and lower the result to a temporary.
10062 bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
10063
10064 SPIRExpression *e = nullptr;
10065 if (!forward && expression_is_non_value_type_array(ptr))
10066 {
10067 // Complicated load case where we need to make a copy of ptr, but we cannot, because
10068 // it is an array, and our backend does not support arrays as value types.
10069 // Emit the temporary, and copy it explicitly.
10070 e = &emit_uninitialized_temporary_expression(result_type, id);
10071 emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
10072 }
10073 else
10074 e = &emit_op(result_type, id, expr, forward, !usage_tracking);
10075
10076 e->need_transpose = need_transpose;
10077 register_read(id, ptr, forward);
10078
10079 if (forward)
10080 {
10081 // Pass through whether the result is of a packed type and the physical type ID.
10082 if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
10083 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10084 if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
10085 {
10086 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
10087 get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
10088 }
10089 }
10090 else
10091 {
10092 // This might have been set on an earlier compilation iteration, force it to be unset.
10093 unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10094 unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
10095 }
10096
10097 inherit_expression_dependencies(id, ptr);
10098 if (forward)
10099 add_implied_read_expression(*e, ptr);
10100 break;
10101 }
10102
10103 case OpInBoundsAccessChain:
10104 case OpAccessChain:
10105 case OpPtrAccessChain:
10106 {
10107 auto *var = maybe_get<SPIRVariable>(ops[2]);
10108 if (var)
10109 flush_variable_declaration(var->self);
10110
10111 // If the base is immutable, the access chain pointer must also be.
10112 // If an expression is mutable and forwardable, we speculate that it is immutable.
10113 AccessChainMeta meta;
10114 bool ptr_chain = opcode == OpPtrAccessChain;
10115 auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
10116
10117 auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
10118
10119 auto *backing_variable = maybe_get_backing_variable(ops[2]);
10120 expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
10121 expr.need_transpose = meta.need_transpose;
10122 expr.access_chain = true;
10123
10124 // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
10125 if (meta.storage_is_packed)
10126 set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
10127 if (meta.storage_physical_type != 0)
10128 set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10129 if (meta.storage_is_invariant)
10130 set_decoration(ops[1], DecorationInvariant);
10131 if (meta.flattened_struct)
10132 flattened_structs[ops[1]] = true;
10133
10134 // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
10135 // temporary which could be subject to invalidation.
10136 // Need to assume we're forwarded while calling inherit_expression_depdendencies.
10137 forwarded_temporaries.insert(ops[1]);
10138 // The access chain itself is never forced to a temporary, but its dependencies might.
10139 suppressed_usage_tracking.insert(ops[1]);
10140
10141 for (uint32_t i = 2; i < length; i++)
10142 {
10143 inherit_expression_dependencies(ops[1], ops[i]);
10144 add_implied_read_expression(expr, ops[i]);
10145 }
10146
10147 // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
10148 // we're not forwarded after all.
10149 if (expr.expression_dependencies.empty())
10150 forwarded_temporaries.erase(ops[1]);
10151
10152 break;
10153 }
10154
10155 case OpStore:
10156 {
10157 auto *var = maybe_get<SPIRVariable>(ops[0]);
10158
10159 if (var && var->statically_assigned)
10160 var->static_expression = ops[1];
10161 else if (var && var->loop_variable && !var->loop_variable_enable)
10162 var->static_expression = ops[1];
10163 else if (var && var->remapped_variable && var->static_expression)
10164 {
10165 // Skip the write.
10166 }
10167 else if (flattened_structs.count(ops[0]))
10168 {
10169 store_flattened_struct(ops[0], ops[1]);
10170 register_write(ops[0]);
10171 }
10172 else
10173 {
10174 emit_store_statement(ops[0], ops[1]);
10175 }
10176
10177 // Storing a pointer results in a variable pointer, so we must conservatively assume
10178 // we can write through it.
10179 if (expression_type(ops[1]).pointer)
10180 register_write(ops[1]);
10181 break;
10182 }
10183
10184 case OpArrayLength:
10185 {
10186 uint32_t result_type = ops[0];
10187 uint32_t id = ops[1];
10188 auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10189 if (has_decoration(ops[2], DecorationNonUniform))
10190 convert_non_uniform_expression(e, ops[2]);
10191 set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
10192 true);
10193 break;
10194 }
10195
10196 // Function calls
10197 case OpFunctionCall:
10198 {
10199 uint32_t result_type = ops[0];
10200 uint32_t id = ops[1];
10201 uint32_t func = ops[2];
10202 const auto *arg = &ops[3];
10203 length -= 3;
10204
10205 auto &callee = get<SPIRFunction>(func);
10206 auto &return_type = get<SPIRType>(callee.return_type);
10207 bool pure = function_is_pure(callee);
10208
10209 bool callee_has_out_variables = false;
10210 bool emit_return_value_as_argument = false;
10211
10212 // Invalidate out variables passed to functions since they can be OpStore'd to.
10213 for (uint32_t i = 0; i < length; i++)
10214 {
10215 if (callee.arguments[i].write_count)
10216 {
10217 register_call_out_argument(arg[i]);
10218 callee_has_out_variables = true;
10219 }
10220
10221 flush_variable_declaration(arg[i]);
10222 }
10223
10224 if (!return_type.array.empty() && !backend.can_return_array)
10225 {
10226 callee_has_out_variables = true;
10227 emit_return_value_as_argument = true;
10228 }
10229
10230 if (!pure)
10231 register_impure_function_call();
10232
10233 string funexpr;
10234 SmallVector<string> arglist;
10235 funexpr += to_name(func) + "(";
10236
10237 if (emit_return_value_as_argument)
10238 {
10239 statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
10240 arglist.push_back(to_name(id));
10241 }
10242
10243 for (uint32_t i = 0; i < length; i++)
10244 {
10245 // Do not pass in separate images or samplers if we're remapping
10246 // to combined image samplers.
10247 if (skip_argument(arg[i]))
10248 continue;
10249
10250 arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
10251 }
10252
10253 for (auto &combined : callee.combined_parameters)
10254 {
10255 auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
10256 auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
10257 arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
10258 }
10259
10260 append_global_func_args(callee, length, arglist);
10261
10262 funexpr += merge(arglist);
10263 funexpr += ")";
10264
10265 // Check for function call constraints.
10266 check_function_call_constraints(arg, length);
10267
10268 if (return_type.basetype != SPIRType::Void)
10269 {
10270 // If the function actually writes to an out variable,
10271 // take the conservative route and do not forward.
10272 // The problem is that we might not read the function
10273 // result (and emit the function) before an out variable
10274 // is read (common case when return value is ignored!
10275 // In order to avoid start tracking invalid variables,
10276 // just avoid the forwarding problem altogether.
10277 bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
10278 (forced_temporaries.find(id) == end(forced_temporaries));
10279
10280 if (emit_return_value_as_argument)
10281 {
10282 statement(funexpr, ";");
10283 set<SPIRExpression>(id, to_name(id), result_type, true);
10284 }
10285 else
10286 emit_op(result_type, id, funexpr, forward);
10287
10288 // Function calls are implicit loads from all variables in question.
10289 // Set dependencies for them.
10290 for (uint32_t i = 0; i < length; i++)
10291 register_read(id, arg[i], forward);
10292
10293 // If we're going to forward the temporary result,
10294 // put dependencies on every variable that must not change.
10295 if (forward)
10296 register_global_read_dependencies(callee, id);
10297 }
10298 else
10299 statement(funexpr, ";");
10300
10301 break;
10302 }
10303
10304 // Composite munging
10305 case OpCompositeConstruct:
10306 {
10307 uint32_t result_type = ops[0];
10308 uint32_t id = ops[1];
10309 const auto *const elems = &ops[2];
10310 length -= 2;
10311
10312 bool forward = true;
10313 for (uint32_t i = 0; i < length; i++)
10314 forward = forward && should_forward(elems[i]);
10315
10316 auto &out_type = get<SPIRType>(result_type);
10317 auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
10318
10319 // Only splat if we have vector constructors.
10320 // Arrays and structs must be initialized properly in full.
10321 bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
10322
10323 bool splat = false;
10324 bool swizzle_splat = false;
10325
10326 if (in_type)
10327 {
10328 splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
10329 swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
10330
10331 if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
10332 {
10333 // Cannot swizzle literal integers as a special case.
10334 swizzle_splat = false;
10335 }
10336 }
10337
10338 if (splat || swizzle_splat)
10339 {
10340 uint32_t input = elems[0];
10341 for (uint32_t i = 0; i < length; i++)
10342 {
10343 if (input != elems[i])
10344 {
10345 splat = false;
10346 swizzle_splat = false;
10347 }
10348 }
10349 }
10350
10351 if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
10352 forward = false;
10353 if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
10354 forward = false;
10355 if (type_is_empty(out_type) && !backend.supports_empty_struct)
10356 forward = false;
10357
10358 string constructor_op;
10359 if (backend.use_initializer_list && composite)
10360 {
10361 bool needs_trailing_tracket = false;
10362 // Only use this path if we are building composites.
10363 // This path cannot be used for arithmetic.
10364 if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
10365 constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
10366 else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
10367 {
10368 // MSL path. Array constructor is baked into type here, do not use _constructor variant.
10369 constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10370 needs_trailing_tracket = true;
10371 }
10372 constructor_op += "{ ";
10373
10374 if (type_is_empty(out_type) && !backend.supports_empty_struct)
10375 constructor_op += "0";
10376 else if (splat)
10377 constructor_op += to_unpacked_expression(elems[0]);
10378 else
10379 constructor_op += build_composite_combiner(result_type, elems, length);
10380 constructor_op += " }";
10381 if (needs_trailing_tracket)
10382 constructor_op += ")";
10383 }
10384 else if (swizzle_splat && !composite)
10385 {
10386 constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
10387 }
10388 else
10389 {
10390 constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10391 if (type_is_empty(out_type) && !backend.supports_empty_struct)
10392 constructor_op += "0";
10393 else if (splat)
10394 constructor_op += to_unpacked_expression(elems[0]);
10395 else
10396 constructor_op += build_composite_combiner(result_type, elems, length);
10397 constructor_op += ")";
10398 }
10399
10400 if (!constructor_op.empty())
10401 {
10402 emit_op(result_type, id, constructor_op, forward);
10403 for (uint32_t i = 0; i < length; i++)
10404 inherit_expression_dependencies(id, elems[i]);
10405 }
10406 break;
10407 }
10408
10409 case OpVectorInsertDynamic:
10410 {
10411 uint32_t result_type = ops[0];
10412 uint32_t id = ops[1];
10413 uint32_t vec = ops[2];
10414 uint32_t comp = ops[3];
10415 uint32_t index = ops[4];
10416
10417 flush_variable_declaration(vec);
10418
10419 // Make a copy, then use access chain to store the variable.
10420 statement(declare_temporary(result_type, id), to_expression(vec), ";");
10421 set<SPIRExpression>(id, to_name(id), result_type, true);
10422 auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
10423 statement(chain, " = ", to_unpacked_expression(comp), ";");
10424 break;
10425 }
10426
10427 case OpVectorExtractDynamic:
10428 {
10429 uint32_t result_type = ops[0];
10430 uint32_t id = ops[1];
10431
10432 auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
10433 emit_op(result_type, id, expr, should_forward(ops[2]));
10434 inherit_expression_dependencies(id, ops[2]);
10435 inherit_expression_dependencies(id, ops[3]);
10436 break;
10437 }
10438
10439 case OpCompositeExtract:
10440 {
10441 uint32_t result_type = ops[0];
10442 uint32_t id = ops[1];
10443 length -= 3;
10444
10445 auto &type = get<SPIRType>(result_type);
10446
10447 // We can only split the expression here if our expression is forwarded as a temporary.
10448 bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
10449
10450 // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
10451 auto &composite_type = expression_type(ops[2]);
10452 bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
10453 if (composite_type_is_complex)
10454 allow_base_expression = false;
10455
10456 // Packed expressions or physical ID mapped expressions cannot be split up.
10457 if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
10458 has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
10459 allow_base_expression = false;
10460
10461 // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
10462 // into the base expression.
10463 if (is_non_native_row_major_matrix(ops[2]))
10464 allow_base_expression = false;
10465
10466 AccessChainMeta meta;
10467 SPIRExpression *e = nullptr;
10468 auto *c = maybe_get<SPIRConstant>(ops[2]);
10469
10470 if (c && !c->specialization && !composite_type_is_complex)
10471 {
10472 auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length);
10473 e = &emit_op(result_type, id, expr, true, true);
10474 }
10475 else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
10476 {
10477 // Only apply this optimization if result is scalar.
10478
10479 // We want to split the access chain from the base.
10480 // This is so we can later combine different CompositeExtract results
10481 // with CompositeConstruct without emitting code like
10482 //
10483 // vec3 temp = texture(...).xyz
10484 // vec4(temp.x, temp.y, temp.z, 1.0).
10485 //
10486 // when we actually wanted to emit this
10487 // vec4(texture(...).xyz, 1.0).
10488 //
10489 // Including the base will prevent this and would trigger multiple reads
10490 // from expression causing it to be forced to an actual temporary in GLSL.
10491 auto expr = access_chain_internal(ops[2], &ops[3], length,
10492 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
10493 ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
10494 e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
10495 inherit_expression_dependencies(id, ops[2]);
10496 e->base_expression = ops[2];
10497 }
10498 else
10499 {
10500 auto expr = access_chain_internal(ops[2], &ops[3], length,
10501 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
10502 e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
10503 inherit_expression_dependencies(id, ops[2]);
10504 }
10505
10506 // Pass through some meta information to the loaded expression.
10507 // We can still end up loading a buffer type to a variable, then CompositeExtract from it
10508 // instead of loading everything through an access chain.
10509 e->need_transpose = meta.need_transpose;
10510 if (meta.storage_is_packed)
10511 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10512 if (meta.storage_physical_type != 0)
10513 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10514 if (meta.storage_is_invariant)
10515 set_decoration(id, DecorationInvariant);
10516
10517 break;
10518 }
10519
10520 case OpCompositeInsert:
10521 {
10522 uint32_t result_type = ops[0];
10523 uint32_t id = ops[1];
10524 uint32_t obj = ops[2];
10525 uint32_t composite = ops[3];
10526 const auto *elems = &ops[4];
10527 length -= 4;
10528
10529 flush_variable_declaration(composite);
10530
10531 // Make a copy, then use access chain to store the variable.
10532 statement(declare_temporary(result_type, id), to_expression(composite), ";");
10533 set<SPIRExpression>(id, to_name(id), result_type, true);
10534 auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10535 statement(chain, " = ", to_unpacked_expression(obj), ";");
10536
10537 break;
10538 }
10539
10540 case OpCopyMemory:
10541 {
10542 uint32_t lhs = ops[0];
10543 uint32_t rhs = ops[1];
10544 if (lhs != rhs)
10545 {
10546 uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
10547 if (!tmp_id)
10548 tmp_id = ir.increase_bound_by(1);
10549 uint32_t tmp_type_id = expression_type(rhs).parent_type;
10550
10551 EmbeddedInstruction fake_load, fake_store;
10552 fake_load.op = OpLoad;
10553 fake_load.length = 3;
10554 fake_load.ops.push_back(tmp_type_id);
10555 fake_load.ops.push_back(tmp_id);
10556 fake_load.ops.push_back(rhs);
10557
10558 fake_store.op = OpStore;
10559 fake_store.length = 2;
10560 fake_store.ops.push_back(lhs);
10561 fake_store.ops.push_back(tmp_id);
10562
10563 // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
10564 // Synthesize a fake Load and Store pair for CopyMemory.
10565 emit_instruction(fake_load);
10566 emit_instruction(fake_store);
10567 }
10568 break;
10569 }
10570
10571 case OpCopyLogical:
10572 {
10573 // This is used for copying object of different types, arrays and structs.
10574 // We need to unroll the copy, element-by-element.
10575 uint32_t result_type = ops[0];
10576 uint32_t id = ops[1];
10577 uint32_t rhs = ops[2];
10578
10579 emit_uninitialized_temporary_expression(result_type, id);
10580 emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
10581 break;
10582 }
10583
10584 case OpCopyObject:
10585 {
10586 uint32_t result_type = ops[0];
10587 uint32_t id = ops[1];
10588 uint32_t rhs = ops[2];
10589 bool pointer = get<SPIRType>(result_type).pointer;
10590
10591 auto *chain = maybe_get<SPIRAccessChain>(rhs);
10592 auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
10593 if (chain)
10594 {
10595 // Cannot lower to a SPIRExpression, just copy the object.
10596 auto &e = set<SPIRAccessChain>(id, *chain);
10597 e.self = id;
10598 }
10599 else if (imgsamp)
10600 {
10601 // Cannot lower to a SPIRExpression, just copy the object.
10602 // GLSL does not currently use this type and will never get here, but MSL does.
10603 // Handled here instead of CompilerMSL for better integration and general handling,
10604 // and in case GLSL or other subclasses require it in the future.
10605 auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
10606 e.self = id;
10607 }
10608 else if (expression_is_lvalue(rhs) && !pointer)
10609 {
10610 // Need a copy.
10611 // For pointer types, we copy the pointer itself.
10612 statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
10613 set<SPIRExpression>(id, to_name(id), result_type, true);
10614 }
10615 else
10616 {
10617 // RHS expression is immutable, so just forward it.
10618 // Copying these things really make no sense, but
10619 // seems to be allowed anyways.
10620 auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
10621 if (pointer)
10622 {
10623 auto *var = maybe_get_backing_variable(rhs);
10624 e.loaded_from = var ? var->self : ID(0);
10625 }
10626
10627 // If we're copying an access chain, need to inherit the read expressions.
10628 auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
10629 if (rhs_expr)
10630 {
10631 e.implied_read_expressions = rhs_expr->implied_read_expressions;
10632 e.expression_dependencies = rhs_expr->expression_dependencies;
10633 }
10634 }
10635 break;
10636 }
10637
10638 case OpVectorShuffle:
10639 {
10640 uint32_t result_type = ops[0];
10641 uint32_t id = ops[1];
10642 uint32_t vec0 = ops[2];
10643 uint32_t vec1 = ops[3];
10644 const auto *elems = &ops[4];
10645 length -= 4;
10646
10647 auto &type0 = expression_type(vec0);
10648
10649 // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
10650 // or in our case, T(0).
10651 bool shuffle = false;
10652 for (uint32_t i = 0; i < length; i++)
10653 if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
10654 shuffle = true;
10655
10656 // Cannot use swizzles with packed expressions, force shuffle path.
10657 if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
10658 shuffle = true;
10659
10660 string expr;
10661 bool should_fwd, trivial_forward;
10662
10663 if (shuffle)
10664 {
10665 should_fwd = should_forward(vec0) && should_forward(vec1);
10666 trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
10667
10668 // Constructor style and shuffling from two different vectors.
10669 SmallVector<string> args;
10670 for (uint32_t i = 0; i < length; i++)
10671 {
10672 if (elems[i] == 0xffffffffu)
10673 {
10674 // Use a constant 0 here.
10675 // We could use the first component or similar, but then we risk propagating
10676 // a value we might not need, and bog down codegen.
10677 SPIRConstant c;
10678 c.constant_type = type0.parent_type;
10679 assert(type0.parent_type != ID(0));
10680 args.push_back(constant_expression(c));
10681 }
10682 else if (elems[i] >= type0.vecsize)
10683 args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
10684 else
10685 args.push_back(to_extract_component_expression(vec0, elems[i]));
10686 }
10687 expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
10688 }
10689 else
10690 {
10691 should_fwd = should_forward(vec0);
10692 trivial_forward = should_suppress_usage_tracking(vec0);
10693
10694 // We only source from first vector, so can use swizzle.
10695 // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
10696 expr += to_enclosed_unpacked_expression(vec0);
10697 expr += ".";
10698 for (uint32_t i = 0; i < length; i++)
10699 {
10700 assert(elems[i] != 0xffffffffu);
10701 expr += index_to_swizzle(elems[i]);
10702 }
10703
10704 if (backend.swizzle_is_function && length > 1)
10705 expr += "()";
10706 }
10707
10708 // A shuffle is trivial in that it doesn't actually *do* anything.
10709 // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
10710
10711 emit_op(result_type, id, expr, should_fwd, trivial_forward);
10712
10713 inherit_expression_dependencies(id, vec0);
10714 if (vec0 != vec1)
10715 inherit_expression_dependencies(id, vec1);
10716 break;
10717 }
10718
10719 // ALU
10720 case OpIsNan:
10721 GLSL_UFOP(isnan);
10722 break;
10723
10724 case OpIsInf:
10725 GLSL_UFOP(isinf);
10726 break;
10727
10728 case OpSNegate:
10729 case OpFNegate:
10730 GLSL_UOP(-);
10731 break;
10732
10733 case OpIAdd:
10734 {
10735 // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
10736 auto type = get<SPIRType>(ops[0]).basetype;
10737 GLSL_BOP_CAST(+, type);
10738 break;
10739 }
10740
10741 case OpFAdd:
10742 GLSL_BOP(+);
10743 break;
10744
10745 case OpISub:
10746 {
10747 auto type = get<SPIRType>(ops[0]).basetype;
10748 GLSL_BOP_CAST(-, type);
10749 break;
10750 }
10751
10752 case OpFSub:
10753 GLSL_BOP(-);
10754 break;
10755
10756 case OpIMul:
10757 {
10758 auto type = get<SPIRType>(ops[0]).basetype;
10759 GLSL_BOP_CAST(*, type);
10760 break;
10761 }
10762
10763 case OpVectorTimesMatrix:
10764 case OpMatrixTimesVector:
10765 {
10766 // If the matrix needs transpose, just flip the multiply order.
10767 auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
10768 if (e && e->need_transpose)
10769 {
10770 e->need_transpose = false;
10771 string expr;
10772
10773 if (opcode == OpMatrixTimesVector)
10774 expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
10775 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10776 else
10777 expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10778 to_enclosed_unpacked_expression(ops[2]));
10779
10780 bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10781 emit_op(ops[0], ops[1], expr, forward);
10782 e->need_transpose = true;
10783 inherit_expression_dependencies(ops[1], ops[2]);
10784 inherit_expression_dependencies(ops[1], ops[3]);
10785 }
10786 else
10787 GLSL_BOP(*);
10788 break;
10789 }
10790
10791 case OpMatrixTimesMatrix:
10792 {
10793 auto *a = maybe_get<SPIRExpression>(ops[2]);
10794 auto *b = maybe_get<SPIRExpression>(ops[3]);
10795
10796 // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
10797 // a^T * b^T = (b * a)^T.
10798 if (a && b && a->need_transpose && b->need_transpose)
10799 {
10800 a->need_transpose = false;
10801 b->need_transpose = false;
10802 auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10803 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10804 bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10805 auto &e = emit_op(ops[0], ops[1], expr, forward);
10806 e.need_transpose = true;
10807 a->need_transpose = true;
10808 b->need_transpose = true;
10809 inherit_expression_dependencies(ops[1], ops[2]);
10810 inherit_expression_dependencies(ops[1], ops[3]);
10811 }
10812 else
10813 GLSL_BOP(*);
10814
10815 break;
10816 }
10817
10818 case OpFMul:
10819 case OpMatrixTimesScalar:
10820 case OpVectorTimesScalar:
10821 GLSL_BOP(*);
10822 break;
10823
10824 case OpOuterProduct:
10825 GLSL_BFOP(outerProduct);
10826 break;
10827
10828 case OpDot:
10829 GLSL_BFOP(dot);
10830 break;
10831
10832 case OpTranspose:
10833 if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
10834 {
10835 // transpose() is not available, so instead, flip need_transpose,
10836 // which can later be turned into an emulated transpose op by
10837 // convert_row_major_matrix(), if necessary.
10838 uint32_t result_type = ops[0];
10839 uint32_t result_id = ops[1];
10840 uint32_t input = ops[2];
10841
10842 // Force need_transpose to false temporarily to prevent
10843 // to_expression() from doing the transpose.
10844 bool need_transpose = false;
10845 auto *input_e = maybe_get<SPIRExpression>(input);
10846 if (input_e)
10847 swap(need_transpose, input_e->need_transpose);
10848
10849 bool forward = should_forward(input);
10850 auto &e = emit_op(result_type, result_id, to_expression(input), forward);
10851 e.need_transpose = !need_transpose;
10852
10853 // Restore the old need_transpose flag.
10854 if (input_e)
10855 input_e->need_transpose = need_transpose;
10856 }
10857 else
10858 GLSL_UFOP(transpose);
10859 break;
10860
10861 case OpSRem:
10862 {
10863 uint32_t result_type = ops[0];
10864 uint32_t result_id = ops[1];
10865 uint32_t op0 = ops[2];
10866 uint32_t op1 = ops[3];
10867
10868 // Needs special handling.
10869 bool forward = should_forward(op0) && should_forward(op1);
10870 auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
10871 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
10872
10873 emit_op(result_type, result_id, expr, forward);
10874 inherit_expression_dependencies(result_id, op0);
10875 inherit_expression_dependencies(result_id, op1);
10876 break;
10877 }
10878
10879 case OpSDiv:
10880 GLSL_BOP_CAST(/, int_type);
10881 break;
10882
10883 case OpUDiv:
10884 GLSL_BOP_CAST(/, uint_type);
10885 break;
10886
10887 case OpIAddCarry:
10888 case OpISubBorrow:
10889 {
10890 if (options.es && options.version < 310)
10891 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
10892 else if (!options.es && options.version < 400)
10893 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
10894
10895 uint32_t result_type = ops[0];
10896 uint32_t result_id = ops[1];
10897 uint32_t op0 = ops[2];
10898 uint32_t op1 = ops[3];
10899 auto &type = get<SPIRType>(result_type);
10900 emit_uninitialized_temporary_expression(result_type, result_id);
10901 const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
10902
10903 statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
10904 to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
10905 break;
10906 }
10907
10908 case OpUMulExtended:
10909 case OpSMulExtended:
10910 {
10911 if (options.es && options.version < 310)
10912 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
10913 else if (!options.es && options.version < 400)
10914 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
10915
10916 uint32_t result_type = ops[0];
10917 uint32_t result_id = ops[1];
10918 uint32_t op0 = ops[2];
10919 uint32_t op1 = ops[3];
10920 auto &type = get<SPIRType>(result_type);
10921 emit_uninitialized_temporary_expression(result_type, result_id);
10922 const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
10923
10924 statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
10925 to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
10926 break;
10927 }
10928
10929 case OpFDiv:
10930 GLSL_BOP(/);
10931 break;
10932
10933 case OpShiftRightLogical:
10934 GLSL_BOP_CAST(>>, uint_type);
10935 break;
10936
10937 case OpShiftRightArithmetic:
10938 GLSL_BOP_CAST(>>, int_type);
10939 break;
10940
10941 case OpShiftLeftLogical:
10942 {
10943 auto type = get<SPIRType>(ops[0]).basetype;
10944 GLSL_BOP_CAST(<<, type);
10945 break;
10946 }
10947
10948 case OpBitwiseOr:
10949 {
10950 auto type = get<SPIRType>(ops[0]).basetype;
10951 GLSL_BOP_CAST(|, type);
10952 break;
10953 }
10954
10955 case OpBitwiseXor:
10956 {
10957 auto type = get<SPIRType>(ops[0]).basetype;
10958 GLSL_BOP_CAST(^, type);
10959 break;
10960 }
10961
10962 case OpBitwiseAnd:
10963 {
10964 auto type = get<SPIRType>(ops[0]).basetype;
10965 GLSL_BOP_CAST(&, type);
10966 break;
10967 }
10968
10969 case OpNot:
10970 GLSL_UOP(~);
10971 break;
10972
10973 case OpUMod:
10974 GLSL_BOP_CAST(%, uint_type);
10975 break;
10976
10977 case OpSMod:
10978 GLSL_BOP_CAST(%, int_type);
10979 break;
10980
10981 case OpFMod:
10982 GLSL_BFOP(mod);
10983 break;
10984
10985 case OpFRem:
10986 {
10987 if (is_legacy())
10988 SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
10989 "needed for legacy.");
10990
10991 uint32_t result_type = ops[0];
10992 uint32_t result_id = ops[1];
10993 uint32_t op0 = ops[2];
10994 uint32_t op1 = ops[3];
10995
10996 // Needs special handling.
10997 bool forward = should_forward(op0) && should_forward(op1);
10998 auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
10999 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
11000
11001 emit_op(result_type, result_id, expr, forward);
11002 inherit_expression_dependencies(result_id, op0);
11003 inherit_expression_dependencies(result_id, op1);
11004 break;
11005 }
11006
11007 // Relational
11008 case OpAny:
11009 GLSL_UFOP(any);
11010 break;
11011
11012 case OpAll:
11013 GLSL_UFOP(all);
11014 break;
11015
11016 case OpSelect:
11017 emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
11018 break;
11019
11020 case OpLogicalOr:
11021 {
11022 // No vector variant in GLSL for logical OR.
11023 auto result_type = ops[0];
11024 auto id = ops[1];
11025 auto &type = get<SPIRType>(result_type);
11026
11027 if (type.vecsize > 1)
11028 emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
11029 else
11030 GLSL_BOP(||);
11031 break;
11032 }
11033
11034 case OpLogicalAnd:
11035 {
11036 // No vector variant in GLSL for logical AND.
11037 auto result_type = ops[0];
11038 auto id = ops[1];
11039 auto &type = get<SPIRType>(result_type);
11040
11041 if (type.vecsize > 1)
11042 emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
11043 else
11044 GLSL_BOP(&&);
11045 break;
11046 }
11047
11048 case OpLogicalNot:
11049 {
11050 auto &type = get<SPIRType>(ops[0]);
11051 if (type.vecsize > 1)
11052 GLSL_UFOP(not );
11053 else
11054 GLSL_UOP(!);
11055 break;
11056 }
11057
11058 case OpIEqual:
11059 {
11060 if (expression_type(ops[2]).vecsize > 1)
11061 GLSL_BFOP_CAST(equal, int_type);
11062 else
11063 GLSL_BOP_CAST(==, int_type);
11064 break;
11065 }
11066
11067 case OpLogicalEqual:
11068 case OpFOrdEqual:
11069 {
11070 if (expression_type(ops[2]).vecsize > 1)
11071 GLSL_BFOP(equal);
11072 else
11073 GLSL_BOP(==);
11074 break;
11075 }
11076
11077 case OpINotEqual:
11078 {
11079 if (expression_type(ops[2]).vecsize > 1)
11080 GLSL_BFOP_CAST(notEqual, int_type);
11081 else
11082 GLSL_BOP_CAST(!=, int_type);
11083 break;
11084 }
11085
11086 case OpLogicalNotEqual:
11087 case OpFOrdNotEqual:
11088 {
11089 if (expression_type(ops[2]).vecsize > 1)
11090 GLSL_BFOP(notEqual);
11091 else
11092 GLSL_BOP(!=);
11093 break;
11094 }
11095
11096 case OpUGreaterThan:
11097 case OpSGreaterThan:
11098 {
11099 auto type = opcode == OpUGreaterThan ? uint_type : int_type;
11100 if (expression_type(ops[2]).vecsize > 1)
11101 GLSL_BFOP_CAST(greaterThan, type);
11102 else
11103 GLSL_BOP_CAST(>, type);
11104 break;
11105 }
11106
11107 case OpFOrdGreaterThan:
11108 {
11109 if (expression_type(ops[2]).vecsize > 1)
11110 GLSL_BFOP(greaterThan);
11111 else
11112 GLSL_BOP(>);
11113 break;
11114 }
11115
11116 case OpUGreaterThanEqual:
11117 case OpSGreaterThanEqual:
11118 {
11119 auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
11120 if (expression_type(ops[2]).vecsize > 1)
11121 GLSL_BFOP_CAST(greaterThanEqual, type);
11122 else
11123 GLSL_BOP_CAST(>=, type);
11124 break;
11125 }
11126
11127 case OpFOrdGreaterThanEqual:
11128 {
11129 if (expression_type(ops[2]).vecsize > 1)
11130 GLSL_BFOP(greaterThanEqual);
11131 else
11132 GLSL_BOP(>=);
11133 break;
11134 }
11135
11136 case OpULessThan:
11137 case OpSLessThan:
11138 {
11139 auto type = opcode == OpULessThan ? uint_type : int_type;
11140 if (expression_type(ops[2]).vecsize > 1)
11141 GLSL_BFOP_CAST(lessThan, type);
11142 else
11143 GLSL_BOP_CAST(<, type);
11144 break;
11145 }
11146
11147 case OpFOrdLessThan:
11148 {
11149 if (expression_type(ops[2]).vecsize > 1)
11150 GLSL_BFOP(lessThan);
11151 else
11152 GLSL_BOP(<);
11153 break;
11154 }
11155
11156 case OpULessThanEqual:
11157 case OpSLessThanEqual:
11158 {
11159 auto type = opcode == OpULessThanEqual ? uint_type : int_type;
11160 if (expression_type(ops[2]).vecsize > 1)
11161 GLSL_BFOP_CAST(lessThanEqual, type);
11162 else
11163 GLSL_BOP_CAST(<=, type);
11164 break;
11165 }
11166
11167 case OpFOrdLessThanEqual:
11168 {
11169 if (expression_type(ops[2]).vecsize > 1)
11170 GLSL_BFOP(lessThanEqual);
11171 else
11172 GLSL_BOP(<=);
11173 break;
11174 }
11175
11176 // Conversion
11177 case OpSConvert:
11178 case OpConvertSToF:
11179 case OpUConvert:
11180 case OpConvertUToF:
11181 {
11182 auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
11183 uint32_t result_type = ops[0];
11184 uint32_t id = ops[1];
11185
11186 auto &type = get<SPIRType>(result_type);
11187 auto &arg_type = expression_type(ops[2]);
11188 auto func = type_to_glsl_constructor(type);
11189
11190 if (arg_type.width < type.width || type_is_floating_point(type))
11191 emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
11192 else
11193 emit_unary_func_op(result_type, id, ops[2], func.c_str());
11194 break;
11195 }
11196
11197 case OpConvertFToU:
11198 case OpConvertFToS:
11199 {
11200 // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
11201 uint32_t result_type = ops[0];
11202 uint32_t id = ops[1];
11203 auto &type = get<SPIRType>(result_type);
11204 auto expected_type = type;
11205 auto &float_type = expression_type(ops[2]);
11206 expected_type.basetype =
11207 opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
11208
11209 auto func = type_to_glsl_constructor(expected_type);
11210 emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
11211 break;
11212 }
11213
11214 case OpFConvert:
11215 {
11216 uint32_t result_type = ops[0];
11217 uint32_t id = ops[1];
11218
11219 auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
11220 emit_unary_func_op(result_type, id, ops[2], func.c_str());
11221 break;
11222 }
11223
11224 case OpBitcast:
11225 {
11226 uint32_t result_type = ops[0];
11227 uint32_t id = ops[1];
11228 uint32_t arg = ops[2];
11229
11230 if (!emit_complex_bitcast(result_type, id, arg))
11231 {
11232 auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
11233 emit_unary_func_op(result_type, id, arg, op.c_str());
11234 }
11235 break;
11236 }
11237
11238 case OpQuantizeToF16:
11239 {
11240 uint32_t result_type = ops[0];
11241 uint32_t id = ops[1];
11242 uint32_t arg = ops[2];
11243
11244 string op;
11245 auto &type = get<SPIRType>(result_type);
11246
11247 switch (type.vecsize)
11248 {
11249 case 1:
11250 op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
11251 break;
11252 case 2:
11253 op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
11254 break;
11255 case 3:
11256 {
11257 auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
11258 auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
11259 op = join("vec3(", op0, ", ", op1, ")");
11260 break;
11261 }
11262 case 4:
11263 {
11264 auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
11265 auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
11266 op = join("vec4(", op0, ", ", op1, ")");
11267 break;
11268 }
11269 default:
11270 SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
11271 }
11272
11273 emit_op(result_type, id, op, should_forward(arg));
11274 inherit_expression_dependencies(id, arg);
11275 break;
11276 }
11277
11278 // Derivatives
11279 case OpDPdx:
11280 GLSL_UFOP(dFdx);
11281 if (is_legacy_es())
11282 require_extension_internal("GL_OES_standard_derivatives");
11283 register_control_dependent_expression(ops[1]);
11284 break;
11285
11286 case OpDPdy:
11287 GLSL_UFOP(dFdy);
11288 if (is_legacy_es())
11289 require_extension_internal("GL_OES_standard_derivatives");
11290 register_control_dependent_expression(ops[1]);
11291 break;
11292
11293 case OpDPdxFine:
11294 GLSL_UFOP(dFdxFine);
11295 if (options.es)
11296 {
11297 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11298 }
11299 if (options.version < 450)
11300 require_extension_internal("GL_ARB_derivative_control");
11301 register_control_dependent_expression(ops[1]);
11302 break;
11303
11304 case OpDPdyFine:
11305 GLSL_UFOP(dFdyFine);
11306 if (options.es)
11307 {
11308 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11309 }
11310 if (options.version < 450)
11311 require_extension_internal("GL_ARB_derivative_control");
11312 register_control_dependent_expression(ops[1]);
11313 break;
11314
11315 case OpDPdxCoarse:
11316 if (options.es)
11317 {
11318 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11319 }
11320 GLSL_UFOP(dFdxCoarse);
11321 if (options.version < 450)
11322 require_extension_internal("GL_ARB_derivative_control");
11323 register_control_dependent_expression(ops[1]);
11324 break;
11325
11326 case OpDPdyCoarse:
11327 GLSL_UFOP(dFdyCoarse);
11328 if (options.es)
11329 {
11330 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11331 }
11332 if (options.version < 450)
11333 require_extension_internal("GL_ARB_derivative_control");
11334 register_control_dependent_expression(ops[1]);
11335 break;
11336
11337 case OpFwidth:
11338 GLSL_UFOP(fwidth);
11339 if (is_legacy_es())
11340 require_extension_internal("GL_OES_standard_derivatives");
11341 register_control_dependent_expression(ops[1]);
11342 break;
11343
11344 case OpFwidthCoarse:
11345 GLSL_UFOP(fwidthCoarse);
11346 if (options.es)
11347 {
11348 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11349 }
11350 if (options.version < 450)
11351 require_extension_internal("GL_ARB_derivative_control");
11352 register_control_dependent_expression(ops[1]);
11353 break;
11354
11355 case OpFwidthFine:
11356 GLSL_UFOP(fwidthFine);
11357 if (options.es)
11358 {
11359 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11360 }
11361 if (options.version < 450)
11362 require_extension_internal("GL_ARB_derivative_control");
11363 register_control_dependent_expression(ops[1]);
11364 break;
11365
11366 // Bitfield
11367 case OpBitFieldInsert:
11368 {
11369 emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
11370 break;
11371 }
11372
11373 case OpBitFieldSExtract:
11374 {
11375 emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
11376 SPIRType::Int, SPIRType::Int);
11377 break;
11378 }
11379
11380 case OpBitFieldUExtract:
11381 {
11382 emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
11383 SPIRType::Int, SPIRType::Int);
11384 break;
11385 }
11386
11387 case OpBitReverse:
11388 // BitReverse does not have issues with sign since result type must match input type.
11389 GLSL_UFOP(bitfieldReverse);
11390 break;
11391
11392 case OpBitCount:
11393 {
11394 auto basetype = expression_type(ops[2]).basetype;
11395 emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
11396 break;
11397 }
11398
11399 // Atomics
11400 case OpAtomicExchange:
11401 {
11402 uint32_t result_type = ops[0];
11403 uint32_t id = ops[1];
11404 uint32_t ptr = ops[2];
11405 // Ignore semantics for now, probably only relevant to CL.
11406 uint32_t val = ops[5];
11407 const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11408
11409 emit_atomic_func_op(result_type, id, ptr, val, op);
11410 break;
11411 }
11412
11413 case OpAtomicCompareExchange:
11414 {
11415 uint32_t result_type = ops[0];
11416 uint32_t id = ops[1];
11417 uint32_t ptr = ops[2];
11418 uint32_t val = ops[6];
11419 uint32_t comp = ops[7];
11420 const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
11421
11422 emit_atomic_func_op(result_type, id, ptr, comp, val, op);
11423 break;
11424 }
11425
11426 case OpAtomicLoad:
11427 {
11428 // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
11429 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11430 auto &type = expression_type(ops[2]);
11431 forced_temporaries.insert(ops[1]);
11432 bool atomic_image = check_atomic_image(ops[2]);
11433 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11434 (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11435 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11436 const char *increment = unsigned_type ? "0u" : "0";
11437 emit_op(ops[0], ops[1],
11438 join(op, "(",
11439 to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
11440 flush_all_atomic_capable_variables();
11441 break;
11442 }
11443
11444 case OpAtomicStore:
11445 {
11446 // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
11447 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11448 uint32_t ptr = ops[0];
11449 // Ignore semantics for now, probably only relevant to CL.
11450 uint32_t val = ops[3];
11451 const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11452 statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");");
11453 flush_all_atomic_capable_variables();
11454 break;
11455 }
11456
11457 case OpAtomicIIncrement:
11458 case OpAtomicIDecrement:
11459 {
11460 forced_temporaries.insert(ops[1]);
11461 auto &type = expression_type(ops[2]);
11462 if (type.storage == StorageClassAtomicCounter)
11463 {
11464 // Legacy GLSL stuff, not sure if this is relevant to support.
11465 if (opcode == OpAtomicIIncrement)
11466 GLSL_UFOP(atomicCounterIncrement);
11467 else
11468 GLSL_UFOP(atomicCounterDecrement);
11469 }
11470 else
11471 {
11472 bool atomic_image = check_atomic_image(ops[2]);
11473 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11474 (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11475 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11476
11477 const char *increment = nullptr;
11478 if (opcode == OpAtomicIIncrement && unsigned_type)
11479 increment = "1u";
11480 else if (opcode == OpAtomicIIncrement)
11481 increment = "1";
11482 else if (unsigned_type)
11483 increment = "uint(-1)";
11484 else
11485 increment = "-1";
11486
11487 emit_op(ops[0], ops[1],
11488 join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
11489 }
11490
11491 flush_all_atomic_capable_variables();
11492 break;
11493 }
11494
11495 case OpAtomicIAdd:
11496 {
11497 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11498 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11499 break;
11500 }
11501
11502 case OpAtomicISub:
11503 {
11504 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11505 forced_temporaries.insert(ops[1]);
11506 auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
11507 emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
11508 flush_all_atomic_capable_variables();
11509 break;
11510 }
11511
11512 case OpAtomicSMin:
11513 case OpAtomicUMin:
11514 {
11515 const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
11516 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11517 break;
11518 }
11519
11520 case OpAtomicSMax:
11521 case OpAtomicUMax:
11522 {
11523 const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
11524 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11525 break;
11526 }
11527
11528 case OpAtomicAnd:
11529 {
11530 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
11531 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11532 break;
11533 }
11534
11535 case OpAtomicOr:
11536 {
11537 const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
11538 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11539 break;
11540 }
11541
11542 case OpAtomicXor:
11543 {
11544 const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
11545 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11546 break;
11547 }
11548
11549 // Geometry shaders
11550 case OpEmitVertex:
11551 statement("EmitVertex();");
11552 break;
11553
11554 case OpEndPrimitive:
11555 statement("EndPrimitive();");
11556 break;
11557
11558 case OpEmitStreamVertex:
11559 {
11560 if (options.es)
11561 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11562 else if (!options.es && options.version < 400)
11563 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11564
11565 auto stream_expr = to_expression(ops[0]);
11566 if (expression_type(ops[0]).basetype != SPIRType::Int)
11567 stream_expr = join("int(", stream_expr, ")");
11568 statement("EmitStreamVertex(", stream_expr, ");");
11569 break;
11570 }
11571
11572 case OpEndStreamPrimitive:
11573 {
11574 if (options.es)
11575 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11576 else if (!options.es && options.version < 400)
11577 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11578
11579 auto stream_expr = to_expression(ops[0]);
11580 if (expression_type(ops[0]).basetype != SPIRType::Int)
11581 stream_expr = join("int(", stream_expr, ")");
11582 statement("EndStreamPrimitive(", stream_expr, ");");
11583 break;
11584 }
11585
11586 // Textures
11587 case OpImageSampleExplicitLod:
11588 case OpImageSampleProjExplicitLod:
11589 case OpImageSampleDrefExplicitLod:
11590 case OpImageSampleProjDrefExplicitLod:
11591 case OpImageSampleImplicitLod:
11592 case OpImageSampleProjImplicitLod:
11593 case OpImageSampleDrefImplicitLod:
11594 case OpImageSampleProjDrefImplicitLod:
11595 case OpImageFetch:
11596 case OpImageGather:
11597 case OpImageDrefGather:
11598 // Gets a bit hairy, so move this to a separate instruction.
11599 emit_texture_op(instruction, false);
11600 break;
11601
11602 case OpImageSparseSampleExplicitLod:
11603 case OpImageSparseSampleProjExplicitLod:
11604 case OpImageSparseSampleDrefExplicitLod:
11605 case OpImageSparseSampleProjDrefExplicitLod:
11606 case OpImageSparseSampleImplicitLod:
11607 case OpImageSparseSampleProjImplicitLod:
11608 case OpImageSparseSampleDrefImplicitLod:
11609 case OpImageSparseSampleProjDrefImplicitLod:
11610 case OpImageSparseFetch:
11611 case OpImageSparseGather:
11612 case OpImageSparseDrefGather:
11613 // Gets a bit hairy, so move this to a separate instruction.
11614 emit_texture_op(instruction, true);
11615 break;
11616
11617 case OpImageSparseTexelsResident:
11618 if (options.es)
11619 SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
11620 require_extension_internal("GL_ARB_sparse_texture2");
11621 emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
11622 break;
11623
11624 case OpImage:
11625 {
11626 uint32_t result_type = ops[0];
11627 uint32_t id = ops[1];
11628
11629 // Suppress usage tracking.
11630 auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
11631
11632 // When using the image, we need to know which variable it is actually loaded from.
11633 auto *var = maybe_get_backing_variable(ops[2]);
11634 e.loaded_from = var ? var->self : ID(0);
11635 break;
11636 }
11637
11638 case OpImageQueryLod:
11639 {
11640 const char *op = nullptr;
11641 if (!options.es && options.version < 400)
11642 {
11643 require_extension_internal("GL_ARB_texture_query_lod");
11644 // For some reason, the ARB spec is all-caps.
11645 op = "textureQueryLOD";
11646 }
11647 else if (options.es)
11648 SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
11649 else
11650 op = "textureQueryLod";
11651
11652 auto sampler_expr = to_expression(ops[2]);
11653 if (has_decoration(ops[2], DecorationNonUniform))
11654 {
11655 if (maybe_get_backing_variable(ops[2]))
11656 convert_non_uniform_expression(sampler_expr, ops[2]);
11657 else if (*backend.nonuniform_qualifier != '\0')
11658 sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")");
11659 }
11660
11661 bool forward = should_forward(ops[3]);
11662 emit_op(ops[0], ops[1],
11663 join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"),
11664 forward);
11665 inherit_expression_dependencies(ops[1], ops[2]);
11666 inherit_expression_dependencies(ops[1], ops[3]);
11667 register_control_dependent_expression(ops[1]);
11668 break;
11669 }
11670
11671 case OpImageQueryLevels:
11672 {
11673 uint32_t result_type = ops[0];
11674 uint32_t id = ops[1];
11675
11676 if (!options.es && options.version < 430)
11677 require_extension_internal("GL_ARB_texture_query_levels");
11678 if (options.es)
11679 SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
11680
11681 auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
11682 auto &restype = get<SPIRType>(ops[0]);
11683 expr = bitcast_expression(restype, SPIRType::Int, expr);
11684 emit_op(result_type, id, expr, true);
11685 break;
11686 }
11687
11688 case OpImageQuerySamples:
11689 {
11690 auto &type = expression_type(ops[2]);
11691 uint32_t result_type = ops[0];
11692 uint32_t id = ops[1];
11693
11694 string expr;
11695 if (type.image.sampled == 2)
11696 expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")");
11697 else
11698 expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
11699
11700 auto &restype = get<SPIRType>(ops[0]);
11701 expr = bitcast_expression(restype, SPIRType::Int, expr);
11702 emit_op(result_type, id, expr, true);
11703 break;
11704 }
11705
11706 case OpSampledImage:
11707 {
11708 uint32_t result_type = ops[0];
11709 uint32_t id = ops[1];
11710 emit_sampled_image_op(result_type, id, ops[2], ops[3]);
11711 inherit_expression_dependencies(id, ops[2]);
11712 inherit_expression_dependencies(id, ops[3]);
11713 break;
11714 }
11715
11716 case OpImageQuerySizeLod:
11717 {
11718 uint32_t result_type = ops[0];
11719 uint32_t id = ops[1];
11720 uint32_t img = ops[2];
11721
11722 std::string fname = "textureSize";
11723 if (is_legacy_desktop())
11724 {
11725 auto &type = expression_type(img);
11726 auto &imgtype = get<SPIRType>(type.self);
11727 fname = legacy_tex_op(fname, imgtype, img);
11728 }
11729 else if (is_legacy_es())
11730 SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
11731
11732 auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
11733 bitcast_expression(SPIRType::Int, ops[3]), ")");
11734 auto &restype = get<SPIRType>(ops[0]);
11735 expr = bitcast_expression(restype, SPIRType::Int, expr);
11736 emit_op(result_type, id, expr, true);
11737 break;
11738 }
11739
11740 // Image load/store
11741 case OpImageRead:
11742 case OpImageSparseRead:
11743 {
11744 // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
11745 // not adding the proper qualifiers.
11746 // If it turns out we need to read the image after all, remove the qualifier and recompile.
11747 auto *var = maybe_get_backing_variable(ops[2]);
11748 if (var)
11749 {
11750 auto &flags = ir.meta[var->self].decoration.decoration_flags;
11751 if (flags.get(DecorationNonReadable))
11752 {
11753 flags.clear(DecorationNonReadable);
11754 force_recompile();
11755 }
11756 }
11757
11758 uint32_t result_type = ops[0];
11759 uint32_t id = ops[1];
11760
11761 bool pure;
11762 string imgexpr;
11763 auto &type = expression_type(ops[2]);
11764
11765 if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
11766 {
11767 if (type.image.ms)
11768 SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
11769
11770 auto itr =
11771 find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
11772
11773 if (itr == end(pls_inputs))
11774 {
11775 // For non-PLS inputs, we rely on subpass type remapping information to get it right
11776 // since ImageRead always returns 4-component vectors and the backing type is opaque.
11777 if (!var->remapped_components)
11778 SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
11779 imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
11780 }
11781 else
11782 {
11783 // PLS input could have different number of components than what the SPIR expects, swizzle to
11784 // the appropriate vector size.
11785 uint32_t components = pls_format_to_components(itr->format);
11786 imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
11787 }
11788 pure = true;
11789 }
11790 else if (type.image.dim == DimSubpassData)
11791 {
11792 if (var && subpass_input_is_framebuffer_fetch(var->self))
11793 {
11794 imgexpr = to_expression(var->self);
11795 }
11796 else if (options.vulkan_semantics)
11797 {
11798 // With Vulkan semantics, use the proper Vulkan GLSL construct.
11799 if (type.image.ms)
11800 {
11801 uint32_t operands = ops[4];
11802 if (operands != ImageOperandsSampleMask || length != 6)
11803 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11804 "operand mask was used.");
11805
11806 uint32_t samples = ops[5];
11807 imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")");
11808 }
11809 else
11810 imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")");
11811 }
11812 else
11813 {
11814 if (type.image.ms)
11815 {
11816 uint32_t operands = ops[4];
11817 if (operands != ImageOperandsSampleMask || length != 6)
11818 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11819 "operand mask was used.");
11820
11821 uint32_t samples = ops[5];
11822 imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
11823 to_expression(samples), ")");
11824 }
11825 else
11826 {
11827 // Implement subpass loads via texture barrier style sampling.
11828 imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
11829 }
11830 }
11831 imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
11832 pure = true;
11833 }
11834 else
11835 {
11836 bool sparse = opcode == OpImageSparseRead;
11837 uint32_t sparse_code_id = 0;
11838 uint32_t sparse_texel_id = 0;
11839 if (sparse)
11840 emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
11841
11842 // imageLoad only accepts int coords, not uint.
11843 auto coord_expr = to_expression(ops[3]);
11844 auto target_coord_type = expression_type(ops[3]);
11845 target_coord_type.basetype = SPIRType::Int;
11846 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
11847
11848 // Plain image load/store.
11849 if (sparse)
11850 {
11851 if (type.image.ms)
11852 {
11853 uint32_t operands = ops[4];
11854 if (operands != ImageOperandsSampleMask || length != 6)
11855 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11856 "operand mask was used.");
11857
11858 uint32_t samples = ops[5];
11859 statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
11860 coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
11861 }
11862 else
11863 {
11864 statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
11865 coord_expr, ", ", to_expression(sparse_texel_id), ");");
11866 }
11867 imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
11868 to_expression(sparse_texel_id), ")");
11869 }
11870 else
11871 {
11872 if (type.image.ms)
11873 {
11874 uint32_t operands = ops[4];
11875 if (operands != ImageOperandsSampleMask || length != 6)
11876 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11877 "operand mask was used.");
11878
11879 uint32_t samples = ops[5];
11880 imgexpr =
11881 join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
11882 }
11883 else
11884 imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")");
11885 }
11886
11887 if (!sparse)
11888 imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
11889 pure = false;
11890 }
11891
11892 if (var && var->forwardable)
11893 {
11894 bool forward = forced_temporaries.find(id) == end(forced_temporaries);
11895 auto &e = emit_op(result_type, id, imgexpr, forward);
11896
11897 // We only need to track dependencies if we're reading from image load/store.
11898 if (!pure)
11899 {
11900 e.loaded_from = var->self;
11901 if (forward)
11902 var->dependees.push_back(id);
11903 }
11904 }
11905 else
11906 emit_op(result_type, id, imgexpr, false);
11907
11908 inherit_expression_dependencies(id, ops[2]);
11909 if (type.image.ms)
11910 inherit_expression_dependencies(id, ops[5]);
11911 break;
11912 }
11913
11914 case OpImageTexelPointer:
11915 {
11916 uint32_t result_type = ops[0];
11917 uint32_t id = ops[1];
11918
11919 auto coord_expr = to_expression(ops[3]);
11920 auto target_coord_type = expression_type(ops[3]);
11921 target_coord_type.basetype = SPIRType::Int;
11922 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
11923
11924 auto expr = join(to_expression(ops[2]), ", ", coord_expr);
11925 auto &e = set<SPIRExpression>(id, expr, result_type, true);
11926
11927 // When using the pointer, we need to know which variable it is actually loaded from.
11928 auto *var = maybe_get_backing_variable(ops[2]);
11929 e.loaded_from = var ? var->self : ID(0);
11930 inherit_expression_dependencies(id, ops[3]);
11931 break;
11932 }
11933
11934 case OpImageWrite:
11935 {
11936 // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
11937 // not adding the proper qualifiers.
11938 // If it turns out we need to write to the image after all, remove the qualifier and recompile.
11939 auto *var = maybe_get_backing_variable(ops[0]);
11940 if (var)
11941 {
11942 auto &flags = ir.meta[var->self].decoration.decoration_flags;
11943 if (flags.get(DecorationNonWritable))
11944 {
11945 flags.clear(DecorationNonWritable);
11946 force_recompile();
11947 }
11948 }
11949
11950 auto &type = expression_type(ops[0]);
11951 auto &value_type = expression_type(ops[2]);
11952 auto store_type = value_type;
11953 store_type.vecsize = 4;
11954
11955 // imageStore only accepts int coords, not uint.
11956 auto coord_expr = to_expression(ops[1]);
11957 auto target_coord_type = expression_type(ops[1]);
11958 target_coord_type.basetype = SPIRType::Int;
11959 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
11960
11961 if (type.image.ms)
11962 {
11963 uint32_t operands = ops[3];
11964 if (operands != ImageOperandsSampleMask || length != 5)
11965 SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
11966 uint32_t samples = ops[4];
11967 statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
11968 remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
11969 }
11970 else
11971 statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ",
11972 remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
11973
11974 if (var && variable_storage_is_aliased(*var))
11975 flush_all_aliased_variables();
11976 break;
11977 }
11978
11979 case OpImageQuerySize:
11980 {
11981 auto &type = expression_type(ops[2]);
11982 uint32_t result_type = ops[0];
11983 uint32_t id = ops[1];
11984
11985 if (type.basetype == SPIRType::Image)
11986 {
11987 string expr;
11988 if (type.image.sampled == 2)
11989 {
11990 if (!options.es && options.version < 430)
11991 require_extension_internal("GL_ARB_shader_image_size");
11992 else if (options.es && options.version < 310)
11993 SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
11994
11995 // The size of an image is always constant.
11996 expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")");
11997 }
11998 else
11999 {
12000 // This path is hit for samplerBuffers and multisampled images which do not have LOD.
12001 std::string fname = "textureSize";
12002 if (is_legacy())
12003 {
12004 auto &imgtype = get<SPIRType>(type.self);
12005 fname = legacy_tex_op(fname, imgtype, ops[2]);
12006 }
12007 expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
12008 }
12009
12010 auto &restype = get<SPIRType>(ops[0]);
12011 expr = bitcast_expression(restype, SPIRType::Int, expr);
12012 emit_op(result_type, id, expr, true);
12013 }
12014 else
12015 SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
12016 break;
12017 }
12018
12019 // Compute
12020 case OpControlBarrier:
12021 case OpMemoryBarrier:
12022 {
12023 uint32_t execution_scope = 0;
12024 uint32_t memory;
12025 uint32_t semantics;
12026
12027 if (opcode == OpMemoryBarrier)
12028 {
12029 memory = evaluate_constant_u32(ops[0]);
12030 semantics = evaluate_constant_u32(ops[1]);
12031 }
12032 else
12033 {
12034 execution_scope = evaluate_constant_u32(ops[0]);
12035 memory = evaluate_constant_u32(ops[1]);
12036 semantics = evaluate_constant_u32(ops[2]);
12037 }
12038
12039 if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
12040 {
12041 // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
12042 if (opcode != OpControlBarrier)
12043 {
12044 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
12045 }
12046 else
12047 {
12048 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
12049 }
12050 }
12051
12052 if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
12053 {
12054 // Control shaders only have barriers, and it implies memory barriers.
12055 if (opcode == OpControlBarrier)
12056 statement("barrier();");
12057 break;
12058 }
12059
12060 // We only care about these flags, acquire/release and friends are not relevant to GLSL.
12061 semantics = mask_relevant_memory_semantics(semantics);
12062
12063 if (opcode == OpMemoryBarrier)
12064 {
12065 // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
12066 // does what we need, so we avoid redundant barriers.
12067 const Instruction *next = get_next_instruction_in_block(instruction);
12068 if (next && next->op == OpControlBarrier)
12069 {
12070 auto *next_ops = stream(*next);
12071 uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
12072 uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
12073 next_semantics = mask_relevant_memory_semantics(next_semantics);
12074
12075 bool memory_scope_covered = false;
12076 if (next_memory == memory)
12077 memory_scope_covered = true;
12078 else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
12079 {
12080 // If we only care about workgroup memory, either Device or Workgroup scope is fine,
12081 // scope does not have to match.
12082 if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
12083 (memory == ScopeDevice || memory == ScopeWorkgroup))
12084 {
12085 memory_scope_covered = true;
12086 }
12087 }
12088 else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
12089 {
12090 // The control barrier has device scope, but the memory barrier just has workgroup scope.
12091 memory_scope_covered = true;
12092 }
12093
12094 // If we have the same memory scope, and all memory types are covered, we're good.
12095 if (memory_scope_covered && (semantics & next_semantics) == semantics)
12096 break;
12097 }
12098 }
12099
12100 // We are synchronizing some memory or syncing execution,
12101 // so we cannot forward any loads beyond the memory barrier.
12102 if (semantics || opcode == OpControlBarrier)
12103 {
12104 assert(current_emitting_block);
12105 flush_control_dependent_expressions(current_emitting_block->self);
12106 flush_all_active_variables();
12107 }
12108
12109 if (memory == ScopeWorkgroup) // Only need to consider memory within a group
12110 {
12111 if (semantics == MemorySemanticsWorkgroupMemoryMask)
12112 {
12113 // OpControlBarrier implies a memory barrier for shared memory as well.
12114 bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
12115 if (!implies_shared_barrier)
12116 statement("memoryBarrierShared();");
12117 }
12118 else if (semantics != 0)
12119 statement("groupMemoryBarrier();");
12120 }
12121 else if (memory == ScopeSubgroup)
12122 {
12123 const uint32_t all_barriers =
12124 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
12125
12126 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
12127 {
12128 // These are not relevant for GLSL, but assume it means memoryBarrier().
12129 // memoryBarrier() does everything, so no need to test anything else.
12130 statement("subgroupMemoryBarrier();");
12131 }
12132 else if ((semantics & all_barriers) == all_barriers)
12133 {
12134 // Short-hand instead of emitting 3 barriers.
12135 statement("subgroupMemoryBarrier();");
12136 }
12137 else
12138 {
12139 // Pick out individual barriers.
12140 if (semantics & MemorySemanticsWorkgroupMemoryMask)
12141 statement("subgroupMemoryBarrierShared();");
12142 if (semantics & MemorySemanticsUniformMemoryMask)
12143 statement("subgroupMemoryBarrierBuffer();");
12144 if (semantics & MemorySemanticsImageMemoryMask)
12145 statement("subgroupMemoryBarrierImage();");
12146 }
12147 }
12148 else
12149 {
12150 const uint32_t all_barriers =
12151 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
12152
12153 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
12154 {
12155 // These are not relevant for GLSL, but assume it means memoryBarrier().
12156 // memoryBarrier() does everything, so no need to test anything else.
12157 statement("memoryBarrier();");
12158 }
12159 else if ((semantics & all_barriers) == all_barriers)
12160 {
12161 // Short-hand instead of emitting 4 barriers.
12162 statement("memoryBarrier();");
12163 }
12164 else
12165 {
12166 // Pick out individual barriers.
12167 if (semantics & MemorySemanticsWorkgroupMemoryMask)
12168 statement("memoryBarrierShared();");
12169 if (semantics & MemorySemanticsUniformMemoryMask)
12170 statement("memoryBarrierBuffer();");
12171 if (semantics & MemorySemanticsImageMemoryMask)
12172 statement("memoryBarrierImage();");
12173 }
12174 }
12175
12176 if (opcode == OpControlBarrier)
12177 {
12178 if (execution_scope == ScopeSubgroup)
12179 statement("subgroupBarrier();");
12180 else
12181 statement("barrier();");
12182 }
12183 break;
12184 }
12185
12186 case OpExtInst:
12187 {
12188 uint32_t extension_set = ops[2];
12189
12190 if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
12191 {
12192 emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12193 }
12194 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
12195 {
12196 emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12197 }
12198 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
12199 {
12200 emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12201 }
12202 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
12203 {
12204 emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12205 }
12206 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
12207 {
12208 emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12209 }
12210 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
12211 {
12212 break; // Ignore SPIR-V debug information extended instructions.
12213 }
12214 else
12215 {
12216 statement("// unimplemented ext op ", instruction.op);
12217 break;
12218 }
12219
12220 break;
12221 }
12222
12223 // Legacy sub-group stuff ...
12224 case OpSubgroupBallotKHR:
12225 {
12226 uint32_t result_type = ops[0];
12227 uint32_t id = ops[1];
12228 string expr;
12229 expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
12230 emit_op(result_type, id, expr, should_forward(ops[2]));
12231
12232 require_extension_internal("GL_ARB_shader_ballot");
12233 inherit_expression_dependencies(id, ops[2]);
12234 register_control_dependent_expression(ops[1]);
12235 break;
12236 }
12237
12238 case OpSubgroupFirstInvocationKHR:
12239 {
12240 uint32_t result_type = ops[0];
12241 uint32_t id = ops[1];
12242 emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
12243
12244 require_extension_internal("GL_ARB_shader_ballot");
12245 register_control_dependent_expression(ops[1]);
12246 break;
12247 }
12248
12249 case OpSubgroupReadInvocationKHR:
12250 {
12251 uint32_t result_type = ops[0];
12252 uint32_t id = ops[1];
12253 emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
12254
12255 require_extension_internal("GL_ARB_shader_ballot");
12256 register_control_dependent_expression(ops[1]);
12257 break;
12258 }
12259
12260 case OpSubgroupAllKHR:
12261 {
12262 uint32_t result_type = ops[0];
12263 uint32_t id = ops[1];
12264 emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
12265
12266 require_extension_internal("GL_ARB_shader_group_vote");
12267 register_control_dependent_expression(ops[1]);
12268 break;
12269 }
12270
12271 case OpSubgroupAnyKHR:
12272 {
12273 uint32_t result_type = ops[0];
12274 uint32_t id = ops[1];
12275 emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
12276
12277 require_extension_internal("GL_ARB_shader_group_vote");
12278 register_control_dependent_expression(ops[1]);
12279 break;
12280 }
12281
12282 case OpSubgroupAllEqualKHR:
12283 {
12284 uint32_t result_type = ops[0];
12285 uint32_t id = ops[1];
12286 emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
12287
12288 require_extension_internal("GL_ARB_shader_group_vote");
12289 register_control_dependent_expression(ops[1]);
12290 break;
12291 }
12292
12293 case OpGroupIAddNonUniformAMD:
12294 case OpGroupFAddNonUniformAMD:
12295 {
12296 uint32_t result_type = ops[0];
12297 uint32_t id = ops[1];
12298 emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
12299
12300 require_extension_internal("GL_AMD_shader_ballot");
12301 register_control_dependent_expression(ops[1]);
12302 break;
12303 }
12304
12305 case OpGroupFMinNonUniformAMD:
12306 case OpGroupUMinNonUniformAMD:
12307 case OpGroupSMinNonUniformAMD:
12308 {
12309 uint32_t result_type = ops[0];
12310 uint32_t id = ops[1];
12311 emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
12312
12313 require_extension_internal("GL_AMD_shader_ballot");
12314 register_control_dependent_expression(ops[1]);
12315 break;
12316 }
12317
12318 case OpGroupFMaxNonUniformAMD:
12319 case OpGroupUMaxNonUniformAMD:
12320 case OpGroupSMaxNonUniformAMD:
12321 {
12322 uint32_t result_type = ops[0];
12323 uint32_t id = ops[1];
12324 emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
12325
12326 require_extension_internal("GL_AMD_shader_ballot");
12327 register_control_dependent_expression(ops[1]);
12328 break;
12329 }
12330
12331 case OpFragmentMaskFetchAMD:
12332 {
12333 auto &type = expression_type(ops[2]);
12334 uint32_t result_type = ops[0];
12335 uint32_t id = ops[1];
12336
12337 if (type.image.dim == spv::DimSubpassData)
12338 {
12339 emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
12340 }
12341 else
12342 {
12343 emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
12344 }
12345
12346 require_extension_internal("GL_AMD_shader_fragment_mask");
12347 break;
12348 }
12349
12350 case OpFragmentFetchAMD:
12351 {
12352 auto &type = expression_type(ops[2]);
12353 uint32_t result_type = ops[0];
12354 uint32_t id = ops[1];
12355
12356 if (type.image.dim == spv::DimSubpassData)
12357 {
12358 emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
12359 }
12360 else
12361 {
12362 emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
12363 }
12364
12365 require_extension_internal("GL_AMD_shader_fragment_mask");
12366 break;
12367 }
12368
12369 // Vulkan 1.1 sub-group stuff ...
12370 case OpGroupNonUniformElect:
12371 case OpGroupNonUniformBroadcast:
12372 case OpGroupNonUniformBroadcastFirst:
12373 case OpGroupNonUniformBallot:
12374 case OpGroupNonUniformInverseBallot:
12375 case OpGroupNonUniformBallotBitExtract:
12376 case OpGroupNonUniformBallotBitCount:
12377 case OpGroupNonUniformBallotFindLSB:
12378 case OpGroupNonUniformBallotFindMSB:
12379 case OpGroupNonUniformShuffle:
12380 case OpGroupNonUniformShuffleXor:
12381 case OpGroupNonUniformShuffleUp:
12382 case OpGroupNonUniformShuffleDown:
12383 case OpGroupNonUniformAll:
12384 case OpGroupNonUniformAny:
12385 case OpGroupNonUniformAllEqual:
12386 case OpGroupNonUniformFAdd:
12387 case OpGroupNonUniformIAdd:
12388 case OpGroupNonUniformFMul:
12389 case OpGroupNonUniformIMul:
12390 case OpGroupNonUniformFMin:
12391 case OpGroupNonUniformFMax:
12392 case OpGroupNonUniformSMin:
12393 case OpGroupNonUniformSMax:
12394 case OpGroupNonUniformUMin:
12395 case OpGroupNonUniformUMax:
12396 case OpGroupNonUniformBitwiseAnd:
12397 case OpGroupNonUniformBitwiseOr:
12398 case OpGroupNonUniformBitwiseXor:
12399 case OpGroupNonUniformLogicalAnd:
12400 case OpGroupNonUniformLogicalOr:
12401 case OpGroupNonUniformLogicalXor:
12402 case OpGroupNonUniformQuadSwap:
12403 case OpGroupNonUniformQuadBroadcast:
12404 emit_subgroup_op(instruction);
12405 break;
12406
12407 case OpFUnordEqual:
12408 case OpFUnordNotEqual:
12409 case OpFUnordLessThan:
12410 case OpFUnordGreaterThan:
12411 case OpFUnordLessThanEqual:
12412 case OpFUnordGreaterThanEqual:
12413 {
12414 // GLSL doesn't specify if floating point comparisons are ordered or unordered,
12415 // but glslang always emits ordered floating point compares for GLSL.
12416 // To get unordered compares, we can test the opposite thing and invert the result.
12417 // This way, we force true when there is any NaN present.
12418 uint32_t op0 = ops[2];
12419 uint32_t op1 = ops[3];
12420
12421 string expr;
12422 if (expression_type(op0).vecsize > 1)
12423 {
12424 const char *comp_op = nullptr;
12425 switch (opcode)
12426 {
12427 case OpFUnordEqual:
12428 comp_op = "notEqual";
12429 break;
12430
12431 case OpFUnordNotEqual:
12432 comp_op = "equal";
12433 break;
12434
12435 case OpFUnordLessThan:
12436 comp_op = "greaterThanEqual";
12437 break;
12438
12439 case OpFUnordLessThanEqual:
12440 comp_op = "greaterThan";
12441 break;
12442
12443 case OpFUnordGreaterThan:
12444 comp_op = "lessThanEqual";
12445 break;
12446
12447 case OpFUnordGreaterThanEqual:
12448 comp_op = "lessThan";
12449 break;
12450
12451 default:
12452 assert(0);
12453 break;
12454 }
12455
12456 expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
12457 }
12458 else
12459 {
12460 const char *comp_op = nullptr;
12461 switch (opcode)
12462 {
12463 case OpFUnordEqual:
12464 comp_op = " != ";
12465 break;
12466
12467 case OpFUnordNotEqual:
12468 comp_op = " == ";
12469 break;
12470
12471 case OpFUnordLessThan:
12472 comp_op = " >= ";
12473 break;
12474
12475 case OpFUnordLessThanEqual:
12476 comp_op = " > ";
12477 break;
12478
12479 case OpFUnordGreaterThan:
12480 comp_op = " <= ";
12481 break;
12482
12483 case OpFUnordGreaterThanEqual:
12484 comp_op = " < ";
12485 break;
12486
12487 default:
12488 assert(0);
12489 break;
12490 }
12491
12492 expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
12493 }
12494
12495 emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
12496 inherit_expression_dependencies(ops[1], op0);
12497 inherit_expression_dependencies(ops[1], op1);
12498 break;
12499 }
12500
12501 case OpReportIntersectionKHR:
12502 // NV is same opcode.
12503 forced_temporaries.insert(ops[1]);
12504 if (ray_tracing_is_khr)
12505 GLSL_BFOP(reportIntersectionEXT);
12506 else
12507 GLSL_BFOP(reportIntersectionNV);
12508 flush_control_dependent_expressions(current_emitting_block->self);
12509 break;
12510 case OpIgnoreIntersectionNV:
12511 // KHR variant is a terminator.
12512 statement("ignoreIntersectionNV();");
12513 flush_control_dependent_expressions(current_emitting_block->self);
12514 break;
12515 case OpTerminateRayNV:
12516 // KHR variant is a terminator.
12517 statement("terminateRayNV();");
12518 flush_control_dependent_expressions(current_emitting_block->self);
12519 break;
12520 case OpTraceNV:
12521 statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12522 to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12523 to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12524 to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
12525 flush_control_dependent_expressions(current_emitting_block->self);
12526 break;
12527 case OpTraceRayKHR:
12528 if (!has_decoration(ops[10], DecorationLocation))
12529 SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
12530 statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12531 to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12532 to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12533 to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
12534 flush_control_dependent_expressions(current_emitting_block->self);
12535 break;
12536 case OpExecuteCallableNV:
12537 statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
12538 flush_control_dependent_expressions(current_emitting_block->self);
12539 break;
12540 case OpExecuteCallableKHR:
12541 if (!has_decoration(ops[1], DecorationLocation))
12542 SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
12543 statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
12544 flush_control_dependent_expressions(current_emitting_block->self);
12545 break;
12546
12547 // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
12548 case OpRayQueryInitializeKHR:
12549 flush_variable_declaration(ops[0]);
12550 statement("rayQueryInitializeEXT(",
12551 to_expression(ops[0]), ", ", to_expression(ops[1]), ", ",
12552 to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
12553 to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12554 to_expression(ops[6]), ", ", to_expression(ops[7]), ");");
12555 break;
12556 case OpRayQueryProceedKHR:
12557 flush_variable_declaration(ops[0]);
12558 emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false);
12559 break;
12560 case OpRayQueryTerminateKHR:
12561 flush_variable_declaration(ops[0]);
12562 statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");");
12563 break;
12564 case OpRayQueryGenerateIntersectionKHR:
12565 flush_variable_declaration(ops[0]);
12566 statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
12567 break;
12568 case OpRayQueryConfirmIntersectionKHR:
12569 flush_variable_declaration(ops[0]);
12570 statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");");
12571 break;
12572 #define GLSL_RAY_QUERY_GET_OP(op) \
12573 case OpRayQueryGet##op##KHR: \
12574 flush_variable_declaration(ops[2]); \
12575 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
12576 break
12577 #define GLSL_RAY_QUERY_GET_OP2(op) \
12578 case OpRayQueryGet##op##KHR: \
12579 flush_variable_declaration(ops[2]); \
12580 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
12581 break
12582 GLSL_RAY_QUERY_GET_OP(RayTMin);
12583 GLSL_RAY_QUERY_GET_OP(RayFlags);
12584 GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
12585 GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
12586 GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
12587 GLSL_RAY_QUERY_GET_OP2(IntersectionType);
12588 GLSL_RAY_QUERY_GET_OP2(IntersectionT);
12589 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
12590 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
12591 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
12592 GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
12593 GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
12594 GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
12595 GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
12596 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
12597 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
12598 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
12599 GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
12600 #undef GLSL_RAY_QUERY_GET_OP
12601 #undef GLSL_RAY_QUERY_GET_OP2
12602
12603 case OpConvertUToAccelerationStructureKHR:
12604 require_extension_internal("GL_EXT_ray_tracing");
12605 GLSL_UFOP(accelerationStructureEXT);
12606 break;
12607
12608 case OpConvertUToPtr:
12609 {
12610 auto &type = get<SPIRType>(ops[0]);
12611 if (type.storage != StorageClassPhysicalStorageBufferEXT)
12612 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
12613
12614 auto op = type_to_glsl(type);
12615 emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12616 break;
12617 }
12618
12619 case OpConvertPtrToU:
12620 {
12621 auto &type = get<SPIRType>(ops[0]);
12622 auto &ptr_type = expression_type(ops[2]);
12623 if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
12624 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
12625
12626 auto op = type_to_glsl(type);
12627 emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12628 break;
12629 }
12630
12631 case OpUndef:
12632 // Undefined value has been declared.
12633 break;
12634
12635 case OpLine:
12636 {
12637 emit_line_directive(ops[0], ops[1]);
12638 break;
12639 }
12640
12641 case OpNoLine:
12642 break;
12643
12644 case OpDemoteToHelperInvocationEXT:
12645 if (!options.vulkan_semantics)
12646 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12647 require_extension_internal("GL_EXT_demote_to_helper_invocation");
12648 statement(backend.demote_literal, ";");
12649 break;
12650
12651 case OpIsHelperInvocationEXT:
12652 if (!options.vulkan_semantics)
12653 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12654 require_extension_internal("GL_EXT_demote_to_helper_invocation");
12655 emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
12656 break;
12657
12658 case OpBeginInvocationInterlockEXT:
12659 // If the interlock is complex, we emit this elsewhere.
12660 if (!interlocked_is_complex)
12661 {
12662 if (options.es)
12663 statement("beginInvocationInterlockNV();");
12664 else
12665 statement("beginInvocationInterlockARB();");
12666
12667 flush_all_active_variables();
12668 // Make sure forwarding doesn't propagate outside interlock region.
12669 }
12670 break;
12671
12672 case OpEndInvocationInterlockEXT:
12673 // If the interlock is complex, we emit this elsewhere.
12674 if (!interlocked_is_complex)
12675 {
12676 if (options.es)
12677 statement("endInvocationInterlockNV();");
12678 else
12679 statement("endInvocationInterlockARB();");
12680
12681 flush_all_active_variables();
12682 // Make sure forwarding doesn't propagate outside interlock region.
12683 }
12684 break;
12685
12686 default:
12687 statement("// unimplemented op ", instruction.op);
12688 break;
12689 }
12690 }
12691
12692 // Appends function arguments, mapped from global variables, beyond the specified arg index.
12693 // This is used when a function call uses fewer arguments than the function defines.
12694 // This situation may occur if the function signature has been dynamically modified to
12695 // extract global variables referenced from within the function, and convert them to
12696 // function arguments. This is necessary for shader languages that do not support global
12697 // access to shader input content from within a function (eg. Metal). Each additional
12698 // function args uses the name of the global variable. Function nesting will modify the
12699 // functions and function calls all the way up the nesting chain.
append_global_func_args(const SPIRFunction & func,uint32_t index,SmallVector<string> & arglist)12700 void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
12701 {
12702 auto &args = func.arguments;
12703 uint32_t arg_cnt = uint32_t(args.size());
12704 for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
12705 {
12706 auto &arg = args[arg_idx];
12707 assert(arg.alias_global_variable);
12708
12709 // If the underlying variable needs to be declared
12710 // (ie. a local variable with deferred declaration), do so now.
12711 uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
12712 if (var_id)
12713 flush_variable_declaration(var_id);
12714
12715 arglist.push_back(to_func_call_arg(arg, arg.id));
12716 }
12717 }
12718
to_member_name(const SPIRType & type,uint32_t index)12719 string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
12720 {
12721 if (type.type_alias != TypeID(0) &&
12722 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
12723 {
12724 return to_member_name(get<SPIRType>(type.type_alias), index);
12725 }
12726
12727 auto &memb = ir.meta[type.self].members;
12728 if (index < memb.size() && !memb[index].alias.empty())
12729 return memb[index].alias;
12730 else
12731 return join("_m", index);
12732 }
12733
to_member_reference(uint32_t,const SPIRType & type,uint32_t index,bool)12734 string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
12735 {
12736 return join(".", to_member_name(type, index));
12737 }
12738
to_multi_member_reference(const SPIRType & type,const SmallVector<uint32_t> & indices)12739 string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
12740 {
12741 string ret;
12742 auto *member_type = &type;
12743 for (auto &index : indices)
12744 {
12745 ret += join(".", to_member_name(*member_type, index));
12746 member_type = &get<SPIRType>(member_type->member_types[index]);
12747 }
12748 return ret;
12749 }
12750
add_member_name(SPIRType & type,uint32_t index)12751 void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
12752 {
12753 auto &memb = ir.meta[type.self].members;
12754 if (index < memb.size() && !memb[index].alias.empty())
12755 {
12756 auto &name = memb[index].alias;
12757 if (name.empty())
12758 return;
12759
12760 ParsedIR::sanitize_identifier(name, true, true);
12761 update_name_cache(type.member_name_cache, name);
12762 }
12763 }
12764
12765 // Checks whether the ID is a row_major matrix that requires conversion before use
is_non_native_row_major_matrix(uint32_t id)12766 bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
12767 {
12768 // Natively supported row-major matrices do not need to be converted.
12769 // Legacy targets do not support row major.
12770 if (backend.native_row_major_matrix && !is_legacy())
12771 return false;
12772
12773 auto *e = maybe_get<SPIRExpression>(id);
12774 if (e)
12775 return e->need_transpose;
12776 else
12777 return has_decoration(id, DecorationRowMajor);
12778 }
12779
12780 // Checks whether the member is a row_major matrix that requires conversion before use
member_is_non_native_row_major_matrix(const SPIRType & type,uint32_t index)12781 bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
12782 {
12783 // Natively supported row-major matrices do not need to be converted.
12784 if (backend.native_row_major_matrix && !is_legacy())
12785 return false;
12786
12787 // Non-matrix or column-major matrix types do not need to be converted.
12788 if (!has_member_decoration(type.self, index, DecorationRowMajor))
12789 return false;
12790
12791 // Only square row-major matrices can be converted at this time.
12792 // Converting non-square matrices will require defining custom GLSL function that
12793 // swaps matrix elements while retaining the original dimensional form of the matrix.
12794 const auto mbr_type = get<SPIRType>(type.member_types[index]);
12795 if (mbr_type.columns != mbr_type.vecsize)
12796 SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
12797
12798 return true;
12799 }
12800
12801 // Checks if we need to remap physical type IDs when declaring the type in a buffer.
member_is_remapped_physical_type(const SPIRType & type,uint32_t index) const12802 bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
12803 {
12804 return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
12805 }
12806
12807 // Checks whether the member is in packed data type, that might need to be unpacked.
member_is_packed_physical_type(const SPIRType & type,uint32_t index) const12808 bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
12809 {
12810 return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
12811 }
12812
12813 // Wraps the expression string in a function call that converts the
12814 // row_major matrix result of the expression to a column_major matrix.
12815 // Base implementation uses the standard library transpose() function.
12816 // Subclasses may override to use a different function.
convert_row_major_matrix(string exp_str,const SPIRType & exp_type,uint32_t,bool)12817 string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
12818 bool /*is_packed*/)
12819 {
12820 strip_enclosed_expression(exp_str);
12821 if (!is_matrix(exp_type))
12822 {
12823 auto column_index = exp_str.find_last_of('[');
12824 if (column_index == string::npos)
12825 return exp_str;
12826
12827 auto column_expr = exp_str.substr(column_index);
12828 exp_str.resize(column_index);
12829
12830 auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
12831
12832 // Loading a column from a row-major matrix. Unroll the load.
12833 for (uint32_t c = 0; c < exp_type.vecsize; c++)
12834 {
12835 transposed_expr += join(exp_str, '[', c, ']', column_expr);
12836 if (c + 1 < exp_type.vecsize)
12837 transposed_expr += ", ";
12838 }
12839
12840 transposed_expr += ")";
12841 return transposed_expr;
12842 }
12843 else if (options.version < 120)
12844 {
12845 // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
12846 // these GLSL versions do not support non-square matrices.
12847 if (exp_type.vecsize == 2 && exp_type.columns == 2)
12848 {
12849 if (!requires_transpose_2x2)
12850 {
12851 requires_transpose_2x2 = true;
12852 force_recompile();
12853 }
12854 }
12855 else if (exp_type.vecsize == 3 && exp_type.columns == 3)
12856 {
12857 if (!requires_transpose_3x3)
12858 {
12859 requires_transpose_3x3 = true;
12860 force_recompile();
12861 }
12862 }
12863 else if (exp_type.vecsize == 4 && exp_type.columns == 4)
12864 {
12865 if (!requires_transpose_4x4)
12866 {
12867 requires_transpose_4x4 = true;
12868 force_recompile();
12869 }
12870 }
12871 else
12872 SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
12873 return join("spvTranspose(", exp_str, ")");
12874 }
12875 else
12876 return join("transpose(", exp_str, ")");
12877 }
12878
variable_decl(const SPIRType & type,const string & name,uint32_t id)12879 string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
12880 {
12881 string type_name = type_to_glsl(type, id);
12882 remap_variable_type_name(type, name, type_name);
12883 return join(type_name, " ", name, type_to_array_glsl(type));
12884 }
12885
variable_decl_is_remapped_storage(const SPIRVariable & var,StorageClass storage) const12886 bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
12887 {
12888 return var.storage == storage;
12889 }
12890
12891 // Emit a structure member. Subclasses may override to modify output,
12892 // or to dynamically add a padding member if needed.
emit_struct_member(const SPIRType & type,uint32_t member_type_id,uint32_t index,const string & qualifier,uint32_t)12893 void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
12894 const string &qualifier, uint32_t)
12895 {
12896 auto &membertype = get<SPIRType>(member_type_id);
12897
12898 Bitset memberflags;
12899 auto &memb = ir.meta[type.self].members;
12900 if (index < memb.size())
12901 memberflags = memb[index].decoration_flags;
12902
12903 string qualifiers;
12904 bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
12905 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
12906
12907 if (is_block)
12908 qualifiers = to_interpolation_qualifiers(memberflags);
12909
12910 statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
12911 variable_decl(membertype, to_member_name(type, index)), ";");
12912 }
12913
emit_struct_padding_target(const SPIRType &)12914 void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
12915 {
12916 }
12917
flags_to_qualifiers_glsl(const SPIRType & type,const Bitset & flags)12918 string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
12919 {
12920 // GL_EXT_buffer_reference variables can be marked as restrict.
12921 if (flags.get(DecorationRestrictPointerEXT))
12922 return "restrict ";
12923
12924 string qual;
12925
12926 if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier)
12927 qual = "precise ";
12928
12929 // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
12930 bool type_supports_precision =
12931 type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
12932 type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
12933 type.basetype == SPIRType::Sampler;
12934
12935 if (!type_supports_precision)
12936 return qual;
12937
12938 if (options.es)
12939 {
12940 auto &execution = get_entry_point();
12941
12942 if (flags.get(DecorationRelaxedPrecision))
12943 {
12944 bool implied_fmediump = type.basetype == SPIRType::Float &&
12945 options.fragment.default_float_precision == Options::Mediump &&
12946 execution.model == ExecutionModelFragment;
12947
12948 bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
12949 options.fragment.default_int_precision == Options::Mediump &&
12950 execution.model == ExecutionModelFragment;
12951
12952 qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
12953 }
12954 else
12955 {
12956 bool implied_fhighp =
12957 type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
12958 execution.model == ExecutionModelFragment) ||
12959 (execution.model != ExecutionModelFragment));
12960
12961 bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
12962 ((options.fragment.default_int_precision == Options::Highp &&
12963 execution.model == ExecutionModelFragment) ||
12964 (execution.model != ExecutionModelFragment));
12965
12966 qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
12967 }
12968 }
12969 else if (backend.allow_precision_qualifiers)
12970 {
12971 // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
12972 // The default is highp however, so only emit mediump in the rare case that a shader has these.
12973 if (flags.get(DecorationRelaxedPrecision))
12974 qual += "mediump ";
12975 }
12976
12977 return qual;
12978 }
12979
to_precision_qualifiers_glsl(uint32_t id)12980 string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
12981 {
12982 auto &type = expression_type(id);
12983 bool use_precision_qualifiers = backend.allow_precision_qualifiers;
12984 if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
12985 {
12986 // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
12987 auto &result_type = get<SPIRType>(type.image.type);
12988 if (result_type.width < 32)
12989 return "mediump ";
12990 }
12991 return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
12992 }
12993
fixup_io_block_patch_qualifiers(const SPIRVariable & var)12994 void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
12995 {
12996 // Works around weird behavior in glslangValidator where
12997 // a patch out block is translated to just block members getting the decoration.
12998 // To make glslang not complain when we compile again, we have to transform this back to a case where
12999 // the variable itself has Patch decoration, and not members.
13000 auto &type = get<SPIRType>(var.basetype);
13001 if (has_decoration(type.self, DecorationBlock))
13002 {
13003 uint32_t member_count = uint32_t(type.member_types.size());
13004 for (uint32_t i = 0; i < member_count; i++)
13005 {
13006 if (has_member_decoration(type.self, i, DecorationPatch))
13007 {
13008 set_decoration(var.self, DecorationPatch);
13009 break;
13010 }
13011 }
13012
13013 if (has_decoration(var.self, DecorationPatch))
13014 for (uint32_t i = 0; i < member_count; i++)
13015 unset_member_decoration(type.self, i, DecorationPatch);
13016 }
13017 }
13018
to_qualifiers_glsl(uint32_t id)13019 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
13020 {
13021 auto &flags = ir.meta[id].decoration.decoration_flags;
13022 string res;
13023
13024 auto *var = maybe_get<SPIRVariable>(id);
13025
13026 if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
13027 res += "shared ";
13028
13029 res += to_interpolation_qualifiers(flags);
13030 if (var)
13031 res += to_storage_qualifiers_glsl(*var);
13032
13033 auto &type = expression_type(id);
13034 if (type.image.dim != DimSubpassData && type.image.sampled == 2)
13035 {
13036 if (flags.get(DecorationCoherent))
13037 res += "coherent ";
13038 if (flags.get(DecorationRestrict))
13039 res += "restrict ";
13040
13041 if (flags.get(DecorationNonWritable))
13042 res += "readonly ";
13043
13044 bool formatted_load = type.image.format == ImageFormatUnknown;
13045 if (flags.get(DecorationNonReadable))
13046 {
13047 res += "writeonly ";
13048 formatted_load = false;
13049 }
13050
13051 if (formatted_load)
13052 {
13053 if (!options.es)
13054 require_extension_internal("GL_EXT_shader_image_load_formatted");
13055 else
13056 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
13057 }
13058 }
13059
13060 res += to_precision_qualifiers_glsl(id);
13061
13062 return res;
13063 }
13064
argument_decl(const SPIRFunction::Parameter & arg)13065 string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
13066 {
13067 // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
13068 auto &type = expression_type(arg.id);
13069 const char *direction = "";
13070
13071 if (type.pointer)
13072 {
13073 if (arg.write_count && arg.read_count)
13074 direction = "inout ";
13075 else if (arg.write_count)
13076 direction = "out ";
13077 }
13078
13079 return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
13080 }
13081
to_initializer_expression(const SPIRVariable & var)13082 string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
13083 {
13084 return to_expression(var.initializer);
13085 }
13086
to_zero_initialized_expression(uint32_t type_id)13087 string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
13088 {
13089 #ifndef NDEBUG
13090 auto &type = get<SPIRType>(type_id);
13091 assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
13092 type.storage == StorageClassGeneric);
13093 #endif
13094 uint32_t id = ir.increase_bound_by(1);
13095 ir.make_constant_null(id, type_id, false);
13096 return constant_expression(get<SPIRConstant>(id));
13097 }
13098
type_can_zero_initialize(const SPIRType & type) const13099 bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
13100 {
13101 if (type.pointer)
13102 return false;
13103
13104 if (!type.array.empty() && options.flatten_multidimensional_arrays)
13105 return false;
13106
13107 for (auto &literal : type.array_size_literal)
13108 if (!literal)
13109 return false;
13110
13111 for (auto &memb : type.member_types)
13112 if (!type_can_zero_initialize(get<SPIRType>(memb)))
13113 return false;
13114
13115 return true;
13116 }
13117
variable_decl(const SPIRVariable & variable)13118 string CompilerGLSL::variable_decl(const SPIRVariable &variable)
13119 {
13120 // Ignore the pointer type since GLSL doesn't have pointers.
13121 auto &type = get_variable_data_type(variable);
13122
13123 if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
13124 SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
13125
13126 auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
13127
13128 if (variable.loop_variable && variable.static_expression)
13129 {
13130 uint32_t expr = variable.static_expression;
13131 if (ir.ids[expr].get_type() != TypeUndef)
13132 res += join(" = ", to_expression(variable.static_expression));
13133 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13134 res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
13135 }
13136 else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
13137 {
13138 uint32_t expr = variable.initializer;
13139 if (ir.ids[expr].get_type() != TypeUndef)
13140 res += join(" = ", to_initializer_expression(variable));
13141 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13142 res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
13143 }
13144
13145 return res;
13146 }
13147
to_pls_qualifiers_glsl(const SPIRVariable & variable)13148 const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
13149 {
13150 auto &flags = ir.meta[variable.self].decoration.decoration_flags;
13151 if (flags.get(DecorationRelaxedPrecision))
13152 return "mediump ";
13153 else
13154 return "highp ";
13155 }
13156
pls_decl(const PlsRemap & var)13157 string CompilerGLSL::pls_decl(const PlsRemap &var)
13158 {
13159 auto &variable = get<SPIRVariable>(var.id);
13160
13161 SPIRType type;
13162 type.vecsize = pls_format_to_components(var.format);
13163 type.basetype = pls_format_to_basetype(var.format);
13164
13165 return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
13166 to_name(variable.self));
13167 }
13168
to_array_size_literal(const SPIRType & type) const13169 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
13170 {
13171 return to_array_size_literal(type, uint32_t(type.array.size() - 1));
13172 }
13173
to_array_size_literal(const SPIRType & type,uint32_t index) const13174 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
13175 {
13176 assert(type.array.size() == type.array_size_literal.size());
13177
13178 if (type.array_size_literal[index])
13179 {
13180 return type.array[index];
13181 }
13182 else
13183 {
13184 // Use the default spec constant value.
13185 // This is the best we can do.
13186 return evaluate_constant_u32(type.array[index]);
13187 }
13188 }
13189
to_array_size(const SPIRType & type,uint32_t index)13190 string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
13191 {
13192 assert(type.array.size() == type.array_size_literal.size());
13193
13194 auto &size = type.array[index];
13195 if (!type.array_size_literal[index])
13196 return to_expression(size);
13197 else if (size)
13198 return convert_to_string(size);
13199 else if (!backend.unsized_array_supported)
13200 {
13201 // For runtime-sized arrays, we can work around
13202 // lack of standard support for this by simply having
13203 // a single element array.
13204 //
13205 // Runtime length arrays must always be the last element
13206 // in an interface block.
13207 return "1";
13208 }
13209 else
13210 return "";
13211 }
13212
type_to_array_glsl(const SPIRType & type)13213 string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
13214 {
13215 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13216 {
13217 // We are using a wrapped pointer type, and we should not emit any array declarations here.
13218 return "";
13219 }
13220
13221 if (type.array.empty())
13222 return "";
13223
13224 if (options.flatten_multidimensional_arrays)
13225 {
13226 string res;
13227 res += "[";
13228 for (auto i = uint32_t(type.array.size()); i; i--)
13229 {
13230 res += enclose_expression(to_array_size(type, i - 1));
13231 if (i > 1)
13232 res += " * ";
13233 }
13234 res += "]";
13235 return res;
13236 }
13237 else
13238 {
13239 if (type.array.size() > 1)
13240 {
13241 if (!options.es && options.version < 430)
13242 require_extension_internal("GL_ARB_arrays_of_arrays");
13243 else if (options.es && options.version < 310)
13244 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
13245 "Try using --flatten-multidimensional-arrays or set "
13246 "options.flatten_multidimensional_arrays to true.");
13247 }
13248
13249 string res;
13250 for (auto i = uint32_t(type.array.size()); i; i--)
13251 {
13252 res += "[";
13253 res += to_array_size(type, i - 1);
13254 res += "]";
13255 }
13256 return res;
13257 }
13258 }
13259
image_type_glsl(const SPIRType & type,uint32_t id)13260 string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
13261 {
13262 auto &imagetype = get<SPIRType>(type.image.type);
13263 string res;
13264
13265 switch (imagetype.basetype)
13266 {
13267 case SPIRType::Int:
13268 case SPIRType::Short:
13269 case SPIRType::SByte:
13270 res = "i";
13271 break;
13272 case SPIRType::UInt:
13273 case SPIRType::UShort:
13274 case SPIRType::UByte:
13275 res = "u";
13276 break;
13277 default:
13278 break;
13279 }
13280
13281 // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
13282 // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
13283
13284 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
13285 return res + "subpassInput" + (type.image.ms ? "MS" : "");
13286 else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
13287 subpass_input_is_framebuffer_fetch(id))
13288 {
13289 SPIRType sampled_type = get<SPIRType>(type.image.type);
13290 sampled_type.vecsize = 4;
13291 return type_to_glsl(sampled_type);
13292 }
13293
13294 // If we're emulating subpassInput with samplers, force sampler2D
13295 // so we don't have to specify format.
13296 if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
13297 {
13298 // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
13299 if (type.image.dim == DimBuffer && type.image.sampled == 1)
13300 res += "sampler";
13301 else
13302 res += type.image.sampled == 2 ? "image" : "texture";
13303 }
13304 else
13305 res += "sampler";
13306
13307 switch (type.image.dim)
13308 {
13309 case Dim1D:
13310 res += "1D";
13311 break;
13312 case Dim2D:
13313 res += "2D";
13314 break;
13315 case Dim3D:
13316 res += "3D";
13317 break;
13318 case DimCube:
13319 res += "Cube";
13320 break;
13321 case DimRect:
13322 if (options.es)
13323 SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
13324
13325 if (is_legacy_desktop())
13326 require_extension_internal("GL_ARB_texture_rectangle");
13327
13328 res += "2DRect";
13329 break;
13330
13331 case DimBuffer:
13332 if (options.es && options.version < 320)
13333 require_extension_internal("GL_OES_texture_buffer");
13334 else if (!options.es && options.version < 300)
13335 require_extension_internal("GL_EXT_texture_buffer_object");
13336 res += "Buffer";
13337 break;
13338
13339 case DimSubpassData:
13340 res += "2D";
13341 break;
13342 default:
13343 SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
13344 }
13345
13346 if (type.image.ms)
13347 res += "MS";
13348 if (type.image.arrayed)
13349 {
13350 if (is_legacy_desktop())
13351 require_extension_internal("GL_EXT_texture_array");
13352 res += "Array";
13353 }
13354
13355 // "Shadow" state in GLSL only exists for samplers and combined image samplers.
13356 if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
13357 image_is_comparison(type, id))
13358 {
13359 res += "Shadow";
13360 }
13361
13362 return res;
13363 }
13364
type_to_glsl_constructor(const SPIRType & type)13365 string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
13366 {
13367 if (backend.use_array_constructor && type.array.size() > 1)
13368 {
13369 if (options.flatten_multidimensional_arrays)
13370 SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
13371 "e.g. float[][]().");
13372 else if (!options.es && options.version < 430)
13373 require_extension_internal("GL_ARB_arrays_of_arrays");
13374 else if (options.es && options.version < 310)
13375 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
13376 }
13377
13378 auto e = type_to_glsl(type);
13379 if (backend.use_array_constructor)
13380 {
13381 for (uint32_t i = 0; i < type.array.size(); i++)
13382 e += "[]";
13383 }
13384 return e;
13385 }
13386
13387 // The optional id parameter indicates the object whose type we are trying
13388 // to find the description for. It is optional. Most type descriptions do not
13389 // depend on a specific object's use of that type.
type_to_glsl(const SPIRType & type,uint32_t id)13390 string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
13391 {
13392 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13393 {
13394 // Need to create a magic type name which compacts the entire type information.
13395 string name = type_to_glsl(get_pointee_type(type));
13396 for (size_t i = 0; i < type.array.size(); i++)
13397 {
13398 if (type.array_size_literal[i])
13399 name += join(type.array[i], "_");
13400 else
13401 name += join("id", type.array[i], "_");
13402 }
13403 name += "Pointer";
13404 return name;
13405 }
13406
13407 switch (type.basetype)
13408 {
13409 case SPIRType::Struct:
13410 // Need OpName lookup here to get a "sensible" name for a struct.
13411 if (backend.explicit_struct_type)
13412 return join("struct ", to_name(type.self));
13413 else
13414 return to_name(type.self);
13415
13416 case SPIRType::Image:
13417 case SPIRType::SampledImage:
13418 return image_type_glsl(type, id);
13419
13420 case SPIRType::Sampler:
13421 // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
13422 // this distinction into the type system.
13423 return comparison_ids.count(id) ? "samplerShadow" : "sampler";
13424
13425 case SPIRType::AccelerationStructure:
13426 return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
13427
13428 case SPIRType::RayQuery:
13429 return "rayQueryEXT";
13430
13431 case SPIRType::Void:
13432 return "void";
13433
13434 default:
13435 break;
13436 }
13437
13438 if (type.basetype == SPIRType::UInt && is_legacy())
13439 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
13440
13441 if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
13442 {
13443 switch (type.basetype)
13444 {
13445 case SPIRType::Boolean:
13446 return "bool";
13447 case SPIRType::SByte:
13448 return backend.basic_int8_type;
13449 case SPIRType::UByte:
13450 return backend.basic_uint8_type;
13451 case SPIRType::Short:
13452 return backend.basic_int16_type;
13453 case SPIRType::UShort:
13454 return backend.basic_uint16_type;
13455 case SPIRType::Int:
13456 return backend.basic_int_type;
13457 case SPIRType::UInt:
13458 return backend.basic_uint_type;
13459 case SPIRType::AtomicCounter:
13460 return "atomic_uint";
13461 case SPIRType::Half:
13462 return "float16_t";
13463 case SPIRType::Float:
13464 return "float";
13465 case SPIRType::Double:
13466 return "double";
13467 case SPIRType::Int64:
13468 return "int64_t";
13469 case SPIRType::UInt64:
13470 return "uint64_t";
13471 default:
13472 return "???";
13473 }
13474 }
13475 else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
13476 {
13477 switch (type.basetype)
13478 {
13479 case SPIRType::Boolean:
13480 return join("bvec", type.vecsize);
13481 case SPIRType::SByte:
13482 return join("i8vec", type.vecsize);
13483 case SPIRType::UByte:
13484 return join("u8vec", type.vecsize);
13485 case SPIRType::Short:
13486 return join("i16vec", type.vecsize);
13487 case SPIRType::UShort:
13488 return join("u16vec", type.vecsize);
13489 case SPIRType::Int:
13490 return join("ivec", type.vecsize);
13491 case SPIRType::UInt:
13492 return join("uvec", type.vecsize);
13493 case SPIRType::Half:
13494 return join("f16vec", type.vecsize);
13495 case SPIRType::Float:
13496 return join("vec", type.vecsize);
13497 case SPIRType::Double:
13498 return join("dvec", type.vecsize);
13499 case SPIRType::Int64:
13500 return join("i64vec", type.vecsize);
13501 case SPIRType::UInt64:
13502 return join("u64vec", type.vecsize);
13503 default:
13504 return "???";
13505 }
13506 }
13507 else if (type.vecsize == type.columns) // Simple Matrix builtin
13508 {
13509 switch (type.basetype)
13510 {
13511 case SPIRType::Boolean:
13512 return join("bmat", type.vecsize);
13513 case SPIRType::Int:
13514 return join("imat", type.vecsize);
13515 case SPIRType::UInt:
13516 return join("umat", type.vecsize);
13517 case SPIRType::Half:
13518 return join("f16mat", type.vecsize);
13519 case SPIRType::Float:
13520 return join("mat", type.vecsize);
13521 case SPIRType::Double:
13522 return join("dmat", type.vecsize);
13523 // Matrix types not supported for int64/uint64.
13524 default:
13525 return "???";
13526 }
13527 }
13528 else
13529 {
13530 switch (type.basetype)
13531 {
13532 case SPIRType::Boolean:
13533 return join("bmat", type.columns, "x", type.vecsize);
13534 case SPIRType::Int:
13535 return join("imat", type.columns, "x", type.vecsize);
13536 case SPIRType::UInt:
13537 return join("umat", type.columns, "x", type.vecsize);
13538 case SPIRType::Half:
13539 return join("f16mat", type.columns, "x", type.vecsize);
13540 case SPIRType::Float:
13541 return join("mat", type.columns, "x", type.vecsize);
13542 case SPIRType::Double:
13543 return join("dmat", type.columns, "x", type.vecsize);
13544 // Matrix types not supported for int64/uint64.
13545 default:
13546 return "???";
13547 }
13548 }
13549 }
13550
add_variable(unordered_set<string> & variables_primary,const unordered_set<string> & variables_secondary,string & name)13551 void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
13552 const unordered_set<string> &variables_secondary, string &name)
13553 {
13554 if (name.empty())
13555 return;
13556
13557 ParsedIR::sanitize_underscores(name);
13558 if (ParsedIR::is_globally_reserved_identifier(name, true))
13559 {
13560 name.clear();
13561 return;
13562 }
13563
13564 update_name_cache(variables_primary, variables_secondary, name);
13565 }
13566
add_local_variable_name(uint32_t id)13567 void CompilerGLSL::add_local_variable_name(uint32_t id)
13568 {
13569 add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
13570 }
13571
add_resource_name(uint32_t id)13572 void CompilerGLSL::add_resource_name(uint32_t id)
13573 {
13574 add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
13575 }
13576
add_header_line(const std::string & line)13577 void CompilerGLSL::add_header_line(const std::string &line)
13578 {
13579 header_lines.push_back(line);
13580 }
13581
has_extension(const std::string & ext) const13582 bool CompilerGLSL::has_extension(const std::string &ext) const
13583 {
13584 auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
13585 return itr != end(forced_extensions);
13586 }
13587
require_extension(const std::string & ext)13588 void CompilerGLSL::require_extension(const std::string &ext)
13589 {
13590 if (!has_extension(ext))
13591 forced_extensions.push_back(ext);
13592 }
13593
require_extension_internal(const string & ext)13594 void CompilerGLSL::require_extension_internal(const string &ext)
13595 {
13596 if (backend.supports_extensions && !has_extension(ext))
13597 {
13598 forced_extensions.push_back(ext);
13599 force_recompile();
13600 }
13601 }
13602
flatten_buffer_block(VariableID id)13603 void CompilerGLSL::flatten_buffer_block(VariableID id)
13604 {
13605 auto &var = get<SPIRVariable>(id);
13606 auto &type = get<SPIRType>(var.basetype);
13607 auto name = to_name(type.self, false);
13608 auto &flags = ir.meta[type.self].decoration.decoration_flags;
13609
13610 if (!type.array.empty())
13611 SPIRV_CROSS_THROW(name + " is an array of UBOs.");
13612 if (type.basetype != SPIRType::Struct)
13613 SPIRV_CROSS_THROW(name + " is not a struct.");
13614 if (!flags.get(DecorationBlock))
13615 SPIRV_CROSS_THROW(name + " is not a block.");
13616 if (type.member_types.empty())
13617 SPIRV_CROSS_THROW(name + " is an empty struct.");
13618
13619 flattened_buffer_blocks.insert(id);
13620 }
13621
builtin_translates_to_nonarray(spv::BuiltIn) const13622 bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
13623 {
13624 return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
13625 }
13626
check_atomic_image(uint32_t id)13627 bool CompilerGLSL::check_atomic_image(uint32_t id)
13628 {
13629 auto &type = expression_type(id);
13630 if (type.storage == StorageClassImage)
13631 {
13632 if (options.es && options.version < 320)
13633 require_extension_internal("GL_OES_shader_image_atomic");
13634
13635 auto *var = maybe_get_backing_variable(id);
13636 if (var)
13637 {
13638 auto &flags = ir.meta[var->self].decoration.decoration_flags;
13639 if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
13640 {
13641 flags.clear(DecorationNonWritable);
13642 flags.clear(DecorationNonReadable);
13643 force_recompile();
13644 }
13645 }
13646 return true;
13647 }
13648 else
13649 return false;
13650 }
13651
add_function_overload(const SPIRFunction & func)13652 void CompilerGLSL::add_function_overload(const SPIRFunction &func)
13653 {
13654 Hasher hasher;
13655 for (auto &arg : func.arguments)
13656 {
13657 // Parameters can vary with pointer type or not,
13658 // but that will not change the signature in GLSL/HLSL,
13659 // so strip the pointer type before hashing.
13660 uint32_t type_id = get_pointee_type_id(arg.type);
13661 auto &type = get<SPIRType>(type_id);
13662
13663 if (!combined_image_samplers.empty())
13664 {
13665 // If we have combined image samplers, we cannot really trust the image and sampler arguments
13666 // we pass down to callees, because they may be shuffled around.
13667 // Ignore these arguments, to make sure that functions need to differ in some other way
13668 // to be considered different overloads.
13669 if (type.basetype == SPIRType::SampledImage ||
13670 (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
13671 {
13672 continue;
13673 }
13674 }
13675
13676 hasher.u32(type_id);
13677 }
13678 uint64_t types_hash = hasher.get();
13679
13680 auto function_name = to_name(func.self);
13681 auto itr = function_overloads.find(function_name);
13682 if (itr != end(function_overloads))
13683 {
13684 // There exists a function with this name already.
13685 auto &overloads = itr->second;
13686 if (overloads.count(types_hash) != 0)
13687 {
13688 // Overload conflict, assign a new name.
13689 add_resource_name(func.self);
13690 function_overloads[to_name(func.self)].insert(types_hash);
13691 }
13692 else
13693 {
13694 // Can reuse the name.
13695 overloads.insert(types_hash);
13696 }
13697 }
13698 else
13699 {
13700 // First time we see this function name.
13701 add_resource_name(func.self);
13702 function_overloads[to_name(func.self)].insert(types_hash);
13703 }
13704 }
13705
emit_function_prototype(SPIRFunction & func,const Bitset & return_flags)13706 void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
13707 {
13708 if (func.self != ir.default_entry_point)
13709 add_function_overload(func);
13710
13711 // Avoid shadow declarations.
13712 local_variable_names = resource_names;
13713
13714 string decl;
13715
13716 auto &type = get<SPIRType>(func.return_type);
13717 decl += flags_to_qualifiers_glsl(type, return_flags);
13718 decl += type_to_glsl(type);
13719 decl += type_to_array_glsl(type);
13720 decl += " ";
13721
13722 if (func.self == ir.default_entry_point)
13723 {
13724 // If we need complex fallback in GLSL, we just wrap main() in a function
13725 // and interlock the entire shader ...
13726 if (interlocked_is_complex)
13727 decl += "spvMainInterlockedBody";
13728 else
13729 decl += "main";
13730
13731 processing_entry_point = true;
13732 }
13733 else
13734 decl += to_name(func.self);
13735
13736 decl += "(";
13737 SmallVector<string> arglist;
13738 for (auto &arg : func.arguments)
13739 {
13740 // Do not pass in separate images or samplers if we're remapping
13741 // to combined image samplers.
13742 if (skip_argument(arg.id))
13743 continue;
13744
13745 // Might change the variable name if it already exists in this function.
13746 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13747 // to use same name for variables.
13748 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13749 add_local_variable_name(arg.id);
13750
13751 arglist.push_back(argument_decl(arg));
13752
13753 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13754 auto *var = maybe_get<SPIRVariable>(arg.id);
13755 if (var)
13756 var->parameter = &arg;
13757 }
13758
13759 for (auto &arg : func.shadow_arguments)
13760 {
13761 // Might change the variable name if it already exists in this function.
13762 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13763 // to use same name for variables.
13764 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13765 add_local_variable_name(arg.id);
13766
13767 arglist.push_back(argument_decl(arg));
13768
13769 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13770 auto *var = maybe_get<SPIRVariable>(arg.id);
13771 if (var)
13772 var->parameter = &arg;
13773 }
13774
13775 decl += merge(arglist);
13776 decl += ")";
13777 statement(decl);
13778 }
13779
emit_function(SPIRFunction & func,const Bitset & return_flags)13780 void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
13781 {
13782 // Avoid potential cycles.
13783 if (func.active)
13784 return;
13785 func.active = true;
13786
13787 // If we depend on a function, emit that function before we emit our own function.
13788 for (auto block : func.blocks)
13789 {
13790 auto &b = get<SPIRBlock>(block);
13791 for (auto &i : b.ops)
13792 {
13793 auto ops = stream(i);
13794 auto op = static_cast<Op>(i.op);
13795
13796 if (op == OpFunctionCall)
13797 {
13798 // Recursively emit functions which are called.
13799 uint32_t id = ops[2];
13800 emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
13801 }
13802 }
13803 }
13804
13805 if (func.entry_line.file_id != 0)
13806 emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
13807 emit_function_prototype(func, return_flags);
13808 begin_scope();
13809
13810 if (func.self == ir.default_entry_point)
13811 emit_entry_point_declarations();
13812
13813 current_function = &func;
13814 auto &entry_block = get<SPIRBlock>(func.entry_block);
13815
13816 sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
13817 for (auto &array : func.constant_arrays_needed_on_stack)
13818 {
13819 auto &c = get<SPIRConstant>(array);
13820 auto &type = get<SPIRType>(c.constant_type);
13821 statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
13822 }
13823
13824 for (auto &v : func.local_variables)
13825 {
13826 auto &var = get<SPIRVariable>(v);
13827 var.deferred_declaration = false;
13828
13829 if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
13830 {
13831 // Special variable type which cannot have initializer,
13832 // need to be declared as standalone variables.
13833 // Comes from MSL which can push global variables as local variables in main function.
13834 add_local_variable_name(var.self);
13835 statement(variable_decl(var), ";");
13836 var.deferred_declaration = false;
13837 }
13838 else if (var.storage == StorageClassPrivate)
13839 {
13840 // These variables will not have had their CFG usage analyzed, so move it to the entry block.
13841 // Comes from MSL which can push global variables as local variables in main function.
13842 // We could just declare them right now, but we would miss out on an important initialization case which is
13843 // LUT declaration in MSL.
13844 // If we don't declare the variable when it is assigned we're forced to go through a helper function
13845 // which copies elements one by one.
13846 add_local_variable_name(var.self);
13847
13848 if (var.initializer)
13849 {
13850 statement(variable_decl(var), ";");
13851 var.deferred_declaration = false;
13852 }
13853 else
13854 {
13855 auto &dominated = entry_block.dominated_variables;
13856 if (find(begin(dominated), end(dominated), var.self) == end(dominated))
13857 entry_block.dominated_variables.push_back(var.self);
13858 var.deferred_declaration = true;
13859 }
13860 }
13861 else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
13862 {
13863 // No need to declare this variable, it has a static expression.
13864 var.deferred_declaration = false;
13865 }
13866 else if (expression_is_lvalue(v))
13867 {
13868 add_local_variable_name(var.self);
13869
13870 // Loop variables should never be declared early, they are explicitly emitted in a loop.
13871 if (var.initializer && !var.loop_variable)
13872 statement(variable_decl_function_local(var), ";");
13873 else
13874 {
13875 // Don't declare variable until first use to declutter the GLSL output quite a lot.
13876 // If we don't touch the variable before first branch,
13877 // declare it then since we need variable declaration to be in top scope.
13878 var.deferred_declaration = true;
13879 }
13880 }
13881 else
13882 {
13883 // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
13884 // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
13885 // This means that when we OpStore to these variables, we just write in the expression ID directly.
13886 // This breaks any kind of branching, since the variable must be statically assigned.
13887 // Branching on samplers and images would be pretty much impossible to fake in GLSL.
13888 var.statically_assigned = true;
13889 }
13890
13891 var.loop_variable_enable = false;
13892
13893 // Loop variables are never declared outside their for-loop, so block any implicit declaration.
13894 if (var.loop_variable)
13895 var.deferred_declaration = false;
13896 }
13897
13898 // Enforce declaration order for regression testing purposes.
13899 for (auto &block_id : func.blocks)
13900 {
13901 auto &block = get<SPIRBlock>(block_id);
13902 sort(begin(block.dominated_variables), end(block.dominated_variables));
13903 }
13904
13905 for (auto &line : current_function->fixup_hooks_in)
13906 line();
13907
13908 emit_block_chain(entry_block);
13909
13910 end_scope();
13911 processing_entry_point = false;
13912 statement("");
13913
13914 // Make sure deferred declaration state for local variables is cleared when we are done with function.
13915 // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
13916 for (auto &v : func.local_variables)
13917 {
13918 auto &var = get<SPIRVariable>(v);
13919 var.deferred_declaration = false;
13920 }
13921 }
13922
emit_fixup()13923 void CompilerGLSL::emit_fixup()
13924 {
13925 if (is_vertex_like_shader())
13926 {
13927 if (options.vertex.fixup_clipspace)
13928 {
13929 const char *suffix = backend.float_literal_suffix ? "f" : "";
13930 statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
13931 }
13932
13933 if (options.vertex.flip_vert_y)
13934 statement("gl_Position.y = -gl_Position.y;");
13935 }
13936 }
13937
flush_phi(BlockID from,BlockID to)13938 void CompilerGLSL::flush_phi(BlockID from, BlockID to)
13939 {
13940 auto &child = get<SPIRBlock>(to);
13941 if (child.ignore_phi_from_block == from)
13942 return;
13943
13944 unordered_set<uint32_t> temporary_phi_variables;
13945
13946 for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
13947 {
13948 auto &phi = *itr;
13949
13950 if (phi.parent == from)
13951 {
13952 auto &var = get<SPIRVariable>(phi.function_variable);
13953
13954 // A Phi variable might be a loop variable, so flush to static expression.
13955 if (var.loop_variable && !var.loop_variable_enable)
13956 var.static_expression = phi.local_variable;
13957 else
13958 {
13959 flush_variable_declaration(phi.function_variable);
13960
13961 // Check if we are going to write to a Phi variable that another statement will read from
13962 // as part of another Phi node in our target block.
13963 // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
13964 // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
13965 bool need_saved_temporary =
13966 find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
13967 return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
13968 }) != end(child.phi_variables);
13969
13970 if (need_saved_temporary)
13971 {
13972 // Need to make sure we declare the phi variable with a copy at the right scope.
13973 // We cannot safely declare a temporary here since we might be inside a continue block.
13974 if (!var.allocate_temporary_copy)
13975 {
13976 var.allocate_temporary_copy = true;
13977 force_recompile();
13978 }
13979 statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
13980 temporary_phi_variables.insert(phi.function_variable);
13981 }
13982
13983 // This might be called in continue block, so make sure we
13984 // use this to emit ESSL 1.0 compliant increments/decrements.
13985 auto lhs = to_expression(phi.function_variable);
13986
13987 string rhs;
13988 if (temporary_phi_variables.count(phi.local_variable))
13989 rhs = join("_", phi.local_variable, "_copy");
13990 else
13991 rhs = to_pointer_expression(phi.local_variable);
13992
13993 if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
13994 statement(lhs, " = ", rhs, ";");
13995 }
13996
13997 register_write(phi.function_variable);
13998 }
13999 }
14000 }
14001
branch_to_continue(BlockID from,BlockID to)14002 void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
14003 {
14004 auto &to_block = get<SPIRBlock>(to);
14005 if (from == to)
14006 return;
14007
14008 assert(is_continue(to));
14009 if (to_block.complex_continue)
14010 {
14011 // Just emit the whole block chain as is.
14012 auto usage_counts = expression_usage_counts;
14013
14014 emit_block_chain(to_block);
14015
14016 // Expression usage counts are moot after returning from the continue block.
14017 expression_usage_counts = usage_counts;
14018 }
14019 else
14020 {
14021 auto &from_block = get<SPIRBlock>(from);
14022 bool outside_control_flow = false;
14023 uint32_t loop_dominator = 0;
14024
14025 // FIXME: Refactor this to not use the old loop_dominator tracking.
14026 if (from_block.merge_block)
14027 {
14028 // If we are a loop header, we don't set the loop dominator,
14029 // so just use "self" here.
14030 loop_dominator = from;
14031 }
14032 else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
14033 {
14034 loop_dominator = from_block.loop_dominator;
14035 }
14036
14037 if (loop_dominator != 0)
14038 {
14039 auto &cfg = get_cfg_for_current_function();
14040
14041 // For non-complex continue blocks, we implicitly branch to the continue block
14042 // by having the continue block be part of the loop header in for (; ; continue-block).
14043 outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
14044 }
14045
14046 // Some simplification for for-loops. We always end up with a useless continue;
14047 // statement since we branch to a loop block.
14048 // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
14049 // we can avoid writing out an explicit continue statement.
14050 // Similar optimization to return statements if we know we're outside flow control.
14051 if (!outside_control_flow)
14052 statement("continue;");
14053 }
14054 }
14055
branch(BlockID from,BlockID to)14056 void CompilerGLSL::branch(BlockID from, BlockID to)
14057 {
14058 flush_phi(from, to);
14059 flush_control_dependent_expressions(from);
14060
14061 bool to_is_continue = is_continue(to);
14062
14063 // This is only a continue if we branch to our loop dominator.
14064 if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
14065 {
14066 // This can happen if we had a complex continue block which was emitted.
14067 // Once the continue block tries to branch to the loop header, just emit continue;
14068 // and end the chain here.
14069 statement("continue;");
14070 }
14071 else if (from != to && is_break(to))
14072 {
14073 // We cannot break to ourselves, so check explicitly for from != to.
14074 // This case can trigger if a loop header is all three of these things:
14075 // - Continue block
14076 // - Loop header
14077 // - Break merge target all at once ...
14078
14079 // Very dirty workaround.
14080 // Switch constructs are able to break, but they cannot break out of a loop at the same time.
14081 // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
14082 // write to the ladder here, and defer the break.
14083 // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
14084 if (current_emitting_switch && is_loop_break(to) &&
14085 current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
14086 get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
14087 {
14088 if (!current_emitting_switch->need_ladder_break)
14089 {
14090 force_recompile();
14091 current_emitting_switch->need_ladder_break = true;
14092 }
14093
14094 statement("_", current_emitting_switch->self, "_ladder_break = true;");
14095 }
14096 statement("break;");
14097 }
14098 else if (to_is_continue || from == to)
14099 {
14100 // For from == to case can happen for a do-while loop which branches into itself.
14101 // We don't mark these cases as continue blocks, but the only possible way to branch into
14102 // ourselves is through means of continue blocks.
14103
14104 // If we are merging to a continue block, there is no need to emit the block chain for continue here.
14105 // We can branch to the continue block after we merge execution.
14106
14107 // Here we make use of structured control flow rules from spec:
14108 // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
14109 // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
14110 // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
14111 auto &block_meta = ir.block_meta[to];
14112 bool branching_to_merge =
14113 (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
14114 ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
14115 if (!to_is_continue || !branching_to_merge)
14116 branch_to_continue(from, to);
14117 }
14118 else if (!is_conditional(to))
14119 emit_block_chain(get<SPIRBlock>(to));
14120
14121 // It is important that we check for break before continue.
14122 // A block might serve two purposes, a break block for the inner scope, and
14123 // a continue block in the outer scope.
14124 // Inner scope always takes precedence.
14125 }
14126
branch(BlockID from,uint32_t cond,BlockID true_block,BlockID false_block)14127 void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
14128 {
14129 auto &from_block = get<SPIRBlock>(from);
14130 BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
14131
14132 // If we branch directly to our selection merge target, we don't need a code path.
14133 bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
14134 bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
14135
14136 if (!true_block_needs_code && !false_block_needs_code)
14137 return;
14138
14139 // We might have a loop merge here. Only consider selection flattening constructs.
14140 // Loop hints are handled explicitly elsewhere.
14141 if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
14142 emit_block_hints(from_block);
14143
14144 if (true_block_needs_code)
14145 {
14146 statement("if (", to_expression(cond), ")");
14147 begin_scope();
14148 branch(from, true_block);
14149 end_scope();
14150
14151 if (false_block_needs_code)
14152 {
14153 statement("else");
14154 begin_scope();
14155 branch(from, false_block);
14156 end_scope();
14157 }
14158 }
14159 else if (false_block_needs_code)
14160 {
14161 // Only need false path, use negative conditional.
14162 statement("if (!", to_enclosed_expression(cond), ")");
14163 begin_scope();
14164 branch(from, false_block);
14165 end_scope();
14166 }
14167 }
14168
14169 // FIXME: This currently cannot handle complex continue blocks
14170 // as in do-while.
14171 // This should be seen as a "trivial" continue block.
emit_continue_block(uint32_t continue_block,bool follow_true_block,bool follow_false_block)14172 string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
14173 {
14174 auto *block = &get<SPIRBlock>(continue_block);
14175
14176 // While emitting the continue block, declare_temporary will check this
14177 // if we have to emit temporaries.
14178 current_continue_block = block;
14179
14180 SmallVector<string> statements;
14181
14182 // Capture all statements into our list.
14183 auto *old = redirect_statement;
14184 redirect_statement = &statements;
14185
14186 // Stamp out all blocks one after each other.
14187 while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
14188 {
14189 // Write out all instructions we have in this block.
14190 emit_block_instructions(*block);
14191
14192 // For plain branchless for/while continue blocks.
14193 if (block->next_block)
14194 {
14195 flush_phi(continue_block, block->next_block);
14196 block = &get<SPIRBlock>(block->next_block);
14197 }
14198 // For do while blocks. The last block will be a select block.
14199 else if (block->true_block && follow_true_block)
14200 {
14201 flush_phi(continue_block, block->true_block);
14202 block = &get<SPIRBlock>(block->true_block);
14203 }
14204 else if (block->false_block && follow_false_block)
14205 {
14206 flush_phi(continue_block, block->false_block);
14207 block = &get<SPIRBlock>(block->false_block);
14208 }
14209 else
14210 {
14211 SPIRV_CROSS_THROW("Invalid continue block detected!");
14212 }
14213 }
14214
14215 // Restore old pointer.
14216 redirect_statement = old;
14217
14218 // Somewhat ugly, strip off the last ';' since we use ',' instead.
14219 // Ideally, we should select this behavior in statement().
14220 for (auto &s : statements)
14221 {
14222 if (!s.empty() && s.back() == ';')
14223 s.erase(s.size() - 1, 1);
14224 }
14225
14226 current_continue_block = nullptr;
14227 return merge(statements);
14228 }
14229
emit_while_loop_initializers(const SPIRBlock & block)14230 void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
14231 {
14232 // While loops do not take initializers, so declare all of them outside.
14233 for (auto &loop_var : block.loop_variables)
14234 {
14235 auto &var = get<SPIRVariable>(loop_var);
14236 statement(variable_decl(var), ";");
14237 }
14238 }
14239
emit_for_loop_initializers(const SPIRBlock & block)14240 string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
14241 {
14242 if (block.loop_variables.empty())
14243 return "";
14244
14245 bool same_types = for_loop_initializers_are_same_type(block);
14246 // We can only declare for loop initializers if all variables are of same type.
14247 // If we cannot do this, declare individual variables before the loop header.
14248
14249 // We might have a loop variable candidate which was not assigned to for some reason.
14250 uint32_t missing_initializers = 0;
14251 for (auto &variable : block.loop_variables)
14252 {
14253 uint32_t expr = get<SPIRVariable>(variable).static_expression;
14254
14255 // Sometimes loop variables are initialized with OpUndef, but we can just declare
14256 // a plain variable without initializer in this case.
14257 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
14258 missing_initializers++;
14259 }
14260
14261 if (block.loop_variables.size() == 1 && missing_initializers == 0)
14262 {
14263 return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
14264 }
14265 else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
14266 {
14267 for (auto &loop_var : block.loop_variables)
14268 statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
14269 return "";
14270 }
14271 else
14272 {
14273 // We have a mix of loop variables, either ones with a clear initializer, or ones without.
14274 // Separate the two streams.
14275 string expr;
14276
14277 for (auto &loop_var : block.loop_variables)
14278 {
14279 uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
14280 if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
14281 {
14282 statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
14283 }
14284 else
14285 {
14286 auto &var = get<SPIRVariable>(loop_var);
14287 auto &type = get_variable_data_type(var);
14288 if (expr.empty())
14289 {
14290 // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
14291 expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
14292 }
14293 else
14294 {
14295 expr += ", ";
14296 // In MSL, being based on C++, the asterisk marking a pointer
14297 // binds to the identifier, not the type.
14298 if (type.pointer)
14299 expr += "* ";
14300 }
14301
14302 expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
14303 }
14304 }
14305 return expr;
14306 }
14307 }
14308
for_loop_initializers_are_same_type(const SPIRBlock & block)14309 bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
14310 {
14311 if (block.loop_variables.size() <= 1)
14312 return true;
14313
14314 uint32_t expected = 0;
14315 Bitset expected_flags;
14316 for (auto &var : block.loop_variables)
14317 {
14318 // Don't care about uninitialized variables as they will not be part of the initializers.
14319 uint32_t expr = get<SPIRVariable>(var).static_expression;
14320 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
14321 continue;
14322
14323 if (expected == 0)
14324 {
14325 expected = get<SPIRVariable>(var).basetype;
14326 expected_flags = get_decoration_bitset(var);
14327 }
14328 else if (expected != get<SPIRVariable>(var).basetype)
14329 return false;
14330
14331 // Precision flags and things like that must also match.
14332 if (expected_flags != get_decoration_bitset(var))
14333 return false;
14334 }
14335
14336 return true;
14337 }
14338
attempt_emit_loop_header(SPIRBlock & block,SPIRBlock::Method method)14339 bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
14340 {
14341 SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14342
14343 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
14344 {
14345 uint32_t current_count = statement_count;
14346 // If we're trying to create a true for loop,
14347 // we need to make sure that all opcodes before branch statement do not actually emit any code.
14348 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14349 emit_block_instructions(block);
14350
14351 bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
14352
14353 // This can work! We only did trivial things which could be forwarded in block body!
14354 if (current_count == statement_count && condition_is_temporary)
14355 {
14356 switch (continue_type)
14357 {
14358 case SPIRBlock::ForLoop:
14359 {
14360 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14361 flush_undeclared_variables(block);
14362
14363 // Important that we do this in this order because
14364 // emitting the continue block can invalidate the condition expression.
14365 auto initializer = emit_for_loop_initializers(block);
14366 auto condition = to_expression(block.condition);
14367
14368 // Condition might have to be inverted.
14369 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14370 condition = join("!", enclose_expression(condition));
14371
14372 emit_block_hints(block);
14373 if (method != SPIRBlock::MergeToSelectContinueForLoop)
14374 {
14375 auto continue_block = emit_continue_block(block.continue_block, false, false);
14376 statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14377 }
14378 else
14379 statement("for (", initializer, "; ", condition, "; )");
14380 break;
14381 }
14382
14383 case SPIRBlock::WhileLoop:
14384 {
14385 // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
14386 flush_undeclared_variables(block);
14387 emit_while_loop_initializers(block);
14388 emit_block_hints(block);
14389
14390 auto condition = to_expression(block.condition);
14391 // Condition might have to be inverted.
14392 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14393 condition = join("!", enclose_expression(condition));
14394
14395 statement("while (", condition, ")");
14396 break;
14397 }
14398
14399 default:
14400 block.disable_block_optimization = true;
14401 force_recompile();
14402 begin_scope(); // We'll see an end_scope() later.
14403 return false;
14404 }
14405
14406 begin_scope();
14407 return true;
14408 }
14409 else
14410 {
14411 block.disable_block_optimization = true;
14412 force_recompile();
14413 begin_scope(); // We'll see an end_scope() later.
14414 return false;
14415 }
14416 }
14417 else if (method == SPIRBlock::MergeToDirectForLoop)
14418 {
14419 auto &child = get<SPIRBlock>(block.next_block);
14420
14421 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14422 flush_undeclared_variables(child);
14423
14424 uint32_t current_count = statement_count;
14425
14426 // If we're trying to create a true for loop,
14427 // we need to make sure that all opcodes before branch statement do not actually emit any code.
14428 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14429 emit_block_instructions(child);
14430
14431 bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
14432
14433 if (current_count == statement_count && condition_is_temporary)
14434 {
14435 uint32_t target_block = child.true_block;
14436
14437 switch (continue_type)
14438 {
14439 case SPIRBlock::ForLoop:
14440 {
14441 // Important that we do this in this order because
14442 // emitting the continue block can invalidate the condition expression.
14443 auto initializer = emit_for_loop_initializers(block);
14444 auto condition = to_expression(child.condition);
14445
14446 // Condition might have to be inverted.
14447 if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14448 {
14449 condition = join("!", enclose_expression(condition));
14450 target_block = child.false_block;
14451 }
14452
14453 auto continue_block = emit_continue_block(block.continue_block, false, false);
14454 emit_block_hints(block);
14455 statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14456 break;
14457 }
14458
14459 case SPIRBlock::WhileLoop:
14460 {
14461 emit_while_loop_initializers(block);
14462 emit_block_hints(block);
14463
14464 auto condition = to_expression(child.condition);
14465 // Condition might have to be inverted.
14466 if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14467 {
14468 condition = join("!", enclose_expression(condition));
14469 target_block = child.false_block;
14470 }
14471
14472 statement("while (", condition, ")");
14473 break;
14474 }
14475
14476 default:
14477 block.disable_block_optimization = true;
14478 force_recompile();
14479 begin_scope(); // We'll see an end_scope() later.
14480 return false;
14481 }
14482
14483 begin_scope();
14484 branch(child.self, target_block);
14485 return true;
14486 }
14487 else
14488 {
14489 block.disable_block_optimization = true;
14490 force_recompile();
14491 begin_scope(); // We'll see an end_scope() later.
14492 return false;
14493 }
14494 }
14495 else
14496 return false;
14497 }
14498
flush_undeclared_variables(SPIRBlock & block)14499 void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
14500 {
14501 for (auto &v : block.dominated_variables)
14502 flush_variable_declaration(v);
14503 }
14504
emit_hoisted_temporaries(SmallVector<pair<TypeID,ID>> & temporaries)14505 void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
14506 {
14507 // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
14508 // Need to sort these to ensure that reference output is stable.
14509 sort(begin(temporaries), end(temporaries),
14510 [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
14511
14512 for (auto &tmp : temporaries)
14513 {
14514 add_local_variable_name(tmp.second);
14515 auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
14516 auto &type = get<SPIRType>(tmp.first);
14517
14518 // Not all targets support pointer literals, so don't bother with that case.
14519 string initializer;
14520 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
14521 initializer = join(" = ", to_zero_initialized_expression(tmp.first));
14522
14523 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
14524
14525 hoisted_temporaries.insert(tmp.second);
14526 forced_temporaries.insert(tmp.second);
14527
14528 // The temporary might be read from before it's assigned, set up the expression now.
14529 set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
14530 }
14531 }
14532
emit_block_chain(SPIRBlock & block)14533 void CompilerGLSL::emit_block_chain(SPIRBlock &block)
14534 {
14535 bool select_branch_to_true_block = false;
14536 bool select_branch_to_false_block = false;
14537 bool skip_direct_branch = false;
14538 bool emitted_loop_header_variables = false;
14539 bool force_complex_continue_block = false;
14540 ValueSaver<uint32_t> loop_level_saver(current_loop_level);
14541
14542 if (block.merge == SPIRBlock::MergeLoop)
14543 add_loop_level();
14544
14545 emit_hoisted_temporaries(block.declare_temporary);
14546
14547 SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
14548 if (block.continue_block)
14549 {
14550 continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14551 // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
14552 if (continue_type == SPIRBlock::ComplexLoop)
14553 block.complex_continue = true;
14554 }
14555
14556 // If we have loop variables, stop masking out access to the variable now.
14557 for (auto var_id : block.loop_variables)
14558 {
14559 auto &var = get<SPIRVariable>(var_id);
14560 var.loop_variable_enable = true;
14561 // We're not going to declare the variable directly, so emit a copy here.
14562 emit_variable_temporary_copies(var);
14563 }
14564
14565 // Remember deferred declaration state. We will restore it before returning.
14566 SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
14567 for (size_t i = 0; i < block.dominated_variables.size(); i++)
14568 {
14569 uint32_t var_id = block.dominated_variables[i];
14570 auto &var = get<SPIRVariable>(var_id);
14571 rearm_dominated_variables[i] = var.deferred_declaration;
14572 }
14573
14574 // This is the method often used by spirv-opt to implement loops.
14575 // The loop header goes straight into the continue block.
14576 // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
14577 // it *MUST* be used in the continue block. This loop method will not work.
14578 if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
14579 {
14580 flush_undeclared_variables(block);
14581 if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
14582 {
14583 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14584 select_branch_to_false_block = true;
14585 else
14586 select_branch_to_true_block = true;
14587
14588 emitted_loop_header_variables = true;
14589 force_complex_continue_block = true;
14590 }
14591 }
14592 // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
14593 else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
14594 {
14595 flush_undeclared_variables(block);
14596 if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
14597 {
14598 // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
14599 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14600 select_branch_to_false_block = true;
14601 else
14602 select_branch_to_true_block = true;
14603
14604 emitted_loop_header_variables = true;
14605 }
14606 }
14607 // This is the newer loop behavior in glslang which branches from Loop header directly to
14608 // a new block, which in turn has a OpBranchSelection without a selection merge.
14609 else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
14610 {
14611 flush_undeclared_variables(block);
14612 if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
14613 {
14614 skip_direct_branch = true;
14615 emitted_loop_header_variables = true;
14616 }
14617 }
14618 else if (continue_type == SPIRBlock::DoWhileLoop)
14619 {
14620 flush_undeclared_variables(block);
14621 emit_while_loop_initializers(block);
14622 emitted_loop_header_variables = true;
14623 // We have some temporaries where the loop header is the dominator.
14624 // We risk a case where we have code like:
14625 // for (;;) { create-temporary; break; } consume-temporary;
14626 // so force-declare temporaries here.
14627 emit_hoisted_temporaries(block.potential_declare_temporary);
14628 statement("do");
14629 begin_scope();
14630
14631 emit_block_instructions(block);
14632 }
14633 else if (block.merge == SPIRBlock::MergeLoop)
14634 {
14635 flush_undeclared_variables(block);
14636 emit_while_loop_initializers(block);
14637 emitted_loop_header_variables = true;
14638
14639 // We have a generic loop without any distinguishable pattern like for, while or do while.
14640 get<SPIRBlock>(block.continue_block).complex_continue = true;
14641 continue_type = SPIRBlock::ComplexLoop;
14642
14643 // We have some temporaries where the loop header is the dominator.
14644 // We risk a case where we have code like:
14645 // for (;;) { create-temporary; break; } consume-temporary;
14646 // so force-declare temporaries here.
14647 emit_hoisted_temporaries(block.potential_declare_temporary);
14648 emit_block_hints(block);
14649 statement("for (;;)");
14650 begin_scope();
14651
14652 emit_block_instructions(block);
14653 }
14654 else
14655 {
14656 emit_block_instructions(block);
14657 }
14658
14659 // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
14660 // as writes to said loop variables might have been masked out, we need a recompile.
14661 if (!emitted_loop_header_variables && !block.loop_variables.empty())
14662 {
14663 force_recompile();
14664 for (auto var : block.loop_variables)
14665 get<SPIRVariable>(var).loop_variable = false;
14666 block.loop_variables.clear();
14667 }
14668
14669 flush_undeclared_variables(block);
14670 bool emit_next_block = true;
14671
14672 // Handle end of block.
14673 switch (block.terminator)
14674 {
14675 case SPIRBlock::Direct:
14676 // True when emitting complex continue block.
14677 if (block.loop_dominator == block.next_block)
14678 {
14679 branch(block.self, block.next_block);
14680 emit_next_block = false;
14681 }
14682 // True if MergeToDirectForLoop succeeded.
14683 else if (skip_direct_branch)
14684 emit_next_block = false;
14685 else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
14686 {
14687 branch(block.self, block.next_block);
14688 emit_next_block = false;
14689 }
14690 break;
14691
14692 case SPIRBlock::Select:
14693 // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
14694 if (select_branch_to_true_block)
14695 {
14696 if (force_complex_continue_block)
14697 {
14698 assert(block.true_block == block.continue_block);
14699
14700 // We're going to emit a continue block directly here, so make sure it's marked as complex.
14701 auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14702 bool old_complex = complex_continue;
14703 complex_continue = true;
14704 branch(block.self, block.true_block);
14705 complex_continue = old_complex;
14706 }
14707 else
14708 branch(block.self, block.true_block);
14709 }
14710 else if (select_branch_to_false_block)
14711 {
14712 if (force_complex_continue_block)
14713 {
14714 assert(block.false_block == block.continue_block);
14715
14716 // We're going to emit a continue block directly here, so make sure it's marked as complex.
14717 auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14718 bool old_complex = complex_continue;
14719 complex_continue = true;
14720 branch(block.self, block.false_block);
14721 complex_continue = old_complex;
14722 }
14723 else
14724 branch(block.self, block.false_block);
14725 }
14726 else
14727 branch(block.self, block.condition, block.true_block, block.false_block);
14728 break;
14729
14730 case SPIRBlock::MultiSelect:
14731 {
14732 auto &type = expression_type(block.condition);
14733 bool unsigned_case =
14734 type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
14735
14736 if (block.merge == SPIRBlock::MergeNone)
14737 SPIRV_CROSS_THROW("Switch statement is not structured");
14738
14739 if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
14740 {
14741 // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
14742 SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
14743 }
14744
14745 const char *label_suffix = "";
14746 if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
14747 label_suffix = "u";
14748 else if (type.basetype == SPIRType::UShort)
14749 label_suffix = backend.uint16_t_literal_suffix;
14750 else if (type.basetype == SPIRType::Short)
14751 label_suffix = backend.int16_t_literal_suffix;
14752
14753 SPIRBlock *old_emitting_switch = current_emitting_switch;
14754 current_emitting_switch = █
14755
14756 if (block.need_ladder_break)
14757 statement("bool _", block.self, "_ladder_break = false;");
14758
14759 // Find all unique case constructs.
14760 unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
14761 SmallVector<uint32_t> block_declaration_order;
14762 SmallVector<uint32_t> literals_to_merge;
14763
14764 // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
14765 // and let the default: block handle it.
14766 // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
14767 // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
14768 for (auto &c : block.cases)
14769 {
14770 if (c.block != block.next_block && c.block != block.default_block)
14771 {
14772 if (!case_constructs.count(c.block))
14773 block_declaration_order.push_back(c.block);
14774 case_constructs[c.block].push_back(c.value);
14775 }
14776 else if (c.block == block.next_block && block.default_block != block.next_block)
14777 {
14778 // We might have to flush phi inside specific case labels.
14779 // If we can piggyback on default:, do so instead.
14780 literals_to_merge.push_back(c.value);
14781 }
14782 }
14783
14784 // Empty literal array -> default.
14785 if (block.default_block != block.next_block)
14786 {
14787 auto &default_block = get<SPIRBlock>(block.default_block);
14788
14789 // We need to slide in the default block somewhere in this chain
14790 // if there are fall-through scenarios since the default is declared separately in OpSwitch.
14791 // Only consider trivial fall-through cases here.
14792 size_t num_blocks = block_declaration_order.size();
14793 bool injected_block = false;
14794
14795 for (size_t i = 0; i < num_blocks; i++)
14796 {
14797 auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
14798 if (execution_is_direct_branch(case_block, default_block))
14799 {
14800 // Fallthrough to default block, we must inject the default block here.
14801 block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
14802 injected_block = true;
14803 break;
14804 }
14805 else if (execution_is_direct_branch(default_block, case_block))
14806 {
14807 // Default case is falling through to another case label, we must inject the default block here.
14808 block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
14809 injected_block = true;
14810 break;
14811 }
14812 }
14813
14814 // Order does not matter.
14815 if (!injected_block)
14816 block_declaration_order.push_back(block.default_block);
14817 else if (is_legacy_es())
14818 SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
14819
14820 case_constructs[block.default_block] = {};
14821 }
14822
14823 size_t num_blocks = block_declaration_order.size();
14824
14825 const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
14826 return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
14827 };
14828
14829 const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint32_t> &labels,
14830 const char *suffix) -> string {
14831 string ret;
14832 size_t count = labels.size();
14833 for (size_t i = 0; i < count; i++)
14834 {
14835 if (i)
14836 ret += " || ";
14837 ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
14838 count > 1 ? ")" : "");
14839 }
14840 return ret;
14841 };
14842
14843 // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
14844 // we need to flush phi nodes outside the switch block in a branch,
14845 // and skip any Phi handling inside the case label to make fall-through work as expected.
14846 // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
14847 // inside the case label if at all possible.
14848 for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
14849 {
14850 if (flush_phi_required(block.self, block_declaration_order[i]) &&
14851 flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
14852 {
14853 uint32_t target_block = block_declaration_order[i];
14854
14855 // Make sure we flush Phi, it might have been marked to be ignored earlier.
14856 get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
14857
14858 auto &literals = case_constructs[target_block];
14859
14860 if (literals.empty())
14861 {
14862 // Oh boy, gotta make a complete negative test instead! o.o
14863 // Find all possible literals that would *not* make us enter the default block.
14864 // If none of those literals match, we flush Phi ...
14865 SmallVector<string> conditions;
14866 for (size_t j = 0; j < num_blocks; j++)
14867 {
14868 auto &negative_literals = case_constructs[block_declaration_order[j]];
14869 for (auto &case_label : negative_literals)
14870 conditions.push_back(join(to_enclosed_expression(block.condition),
14871 " != ", to_case_label(case_label, unsigned_case)));
14872 }
14873
14874 statement("if (", merge(conditions, " && "), ")");
14875 begin_scope();
14876 flush_phi(block.self, target_block);
14877 end_scope();
14878 }
14879 else
14880 {
14881 SmallVector<string> conditions;
14882 conditions.reserve(literals.size());
14883 for (auto &case_label : literals)
14884 conditions.push_back(join(to_enclosed_expression(block.condition),
14885 " == ", to_case_label(case_label, unsigned_case)));
14886 statement("if (", merge(conditions, " || "), ")");
14887 begin_scope();
14888 flush_phi(block.self, target_block);
14889 end_scope();
14890 }
14891
14892 // Mark the block so that we don't flush Phi from header to case label.
14893 get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
14894 }
14895 }
14896
14897 // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
14898 // non-structured exits with the help of a switch block.
14899 // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
14900 bool degenerate_switch = block.default_block != block.merge_block && block.cases.empty();
14901
14902 if (degenerate_switch || is_legacy_es())
14903 {
14904 // ESSL 1.0 is not guaranteed to support do/while.
14905 if (is_legacy_es())
14906 {
14907 uint32_t counter = statement_count;
14908 statement("for (int spvDummy", counter, " = 0; spvDummy", counter,
14909 " < 1; spvDummy", counter, "++)");
14910 }
14911 else
14912 statement("do");
14913 }
14914 else
14915 {
14916 emit_block_hints(block);
14917 statement("switch (", to_expression(block.condition), ")");
14918 }
14919 begin_scope();
14920
14921 for (size_t i = 0; i < num_blocks; i++)
14922 {
14923 uint32_t target_block = block_declaration_order[i];
14924 auto &literals = case_constructs[target_block];
14925
14926 if (literals.empty())
14927 {
14928 // Default case.
14929 if (!degenerate_switch)
14930 {
14931 if (is_legacy_es())
14932 statement("else");
14933 else
14934 statement("default:");
14935 }
14936 }
14937 else
14938 {
14939 if (is_legacy_es())
14940 {
14941 statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
14942 ")");
14943 }
14944 else
14945 {
14946 for (auto &case_literal : literals)
14947 {
14948 // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
14949 statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
14950 }
14951 }
14952 }
14953
14954 auto &case_block = get<SPIRBlock>(target_block);
14955 if (backend.support_case_fallthrough && i + 1 < num_blocks &&
14956 execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
14957 {
14958 // We will fall through here, so just terminate the block chain early.
14959 // We still need to deal with Phi potentially.
14960 // No need for a stack-like thing here since we only do fall-through when there is a
14961 // single trivial branch to fall-through target..
14962 current_emitting_switch_fallthrough = true;
14963 }
14964 else
14965 current_emitting_switch_fallthrough = false;
14966
14967 if (!degenerate_switch)
14968 begin_scope();
14969 branch(block.self, target_block);
14970 if (!degenerate_switch)
14971 end_scope();
14972
14973 current_emitting_switch_fallthrough = false;
14974 }
14975
14976 // Might still have to flush phi variables if we branch from loop header directly to merge target.
14977 if (flush_phi_required(block.self, block.next_block))
14978 {
14979 if (block.default_block == block.next_block || !literals_to_merge.empty())
14980 {
14981 for (auto &case_literal : literals_to_merge)
14982 statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
14983
14984 if (block.default_block == block.next_block)
14985 {
14986 if (is_legacy_es())
14987 statement("else");
14988 else
14989 statement("default:");
14990 }
14991
14992 begin_scope();
14993 flush_phi(block.self, block.next_block);
14994 statement("break;");
14995 end_scope();
14996 }
14997 }
14998
14999 if (degenerate_switch && !is_legacy_es())
15000 end_scope_decl("while(false)");
15001 else
15002 end_scope();
15003
15004 if (block.need_ladder_break)
15005 {
15006 statement("if (_", block.self, "_ladder_break)");
15007 begin_scope();
15008 statement("break;");
15009 end_scope();
15010 }
15011
15012 current_emitting_switch = old_emitting_switch;
15013 break;
15014 }
15015
15016 case SPIRBlock::Return:
15017 {
15018 for (auto &line : current_function->fixup_hooks_out)
15019 line();
15020
15021 if (processing_entry_point)
15022 emit_fixup();
15023
15024 auto &cfg = get_cfg_for_current_function();
15025
15026 if (block.return_value)
15027 {
15028 auto &type = expression_type(block.return_value);
15029 if (!type.array.empty() && !backend.can_return_array)
15030 {
15031 // If we cannot return arrays, we will have a special out argument we can write to instead.
15032 // The backend is responsible for setting this up, and redirection the return values as appropriate.
15033 if (ir.ids[block.return_value].get_type() != TypeUndef)
15034 {
15035 emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
15036 get_expression_effective_storage_class(block.return_value));
15037 }
15038
15039 if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
15040 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
15041 {
15042 statement("return;");
15043 }
15044 }
15045 else
15046 {
15047 // OpReturnValue can return Undef, so don't emit anything for this case.
15048 if (ir.ids[block.return_value].get_type() != TypeUndef)
15049 statement("return ", to_expression(block.return_value), ";");
15050 }
15051 }
15052 else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
15053 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
15054 {
15055 // If this block is the very final block and not called from control flow,
15056 // we do not need an explicit return which looks out of place. Just end the function here.
15057 // In the very weird case of for(;;) { return; } executing return is unconditional,
15058 // but we actually need a return here ...
15059 statement("return;");
15060 }
15061 break;
15062 }
15063
15064 case SPIRBlock::Kill:
15065 statement(backend.discard_literal, ";");
15066 break;
15067
15068 case SPIRBlock::Unreachable:
15069 emit_next_block = false;
15070 break;
15071
15072 case SPIRBlock::IgnoreIntersection:
15073 statement("ignoreIntersectionEXT;");
15074 break;
15075
15076 case SPIRBlock::TerminateRay:
15077 statement("terminateRayEXT;");
15078 break;
15079
15080 default:
15081 SPIRV_CROSS_THROW("Unimplemented block terminator.");
15082 }
15083
15084 if (block.next_block && emit_next_block)
15085 {
15086 // If we hit this case, we're dealing with an unconditional branch, which means we will output
15087 // that block after this. If we had selection merge, we already flushed phi variables.
15088 if (block.merge != SPIRBlock::MergeSelection)
15089 {
15090 flush_phi(block.self, block.next_block);
15091 // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
15092 get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
15093 }
15094
15095 // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
15096 if (!current_emitting_switch_fallthrough)
15097 {
15098 // For merge selects we might have ignored the fact that a merge target
15099 // could have been a break; or continue;
15100 // We will need to deal with it here.
15101 if (is_loop_break(block.next_block))
15102 {
15103 // Cannot check for just break, because switch statements will also use break.
15104 assert(block.merge == SPIRBlock::MergeSelection);
15105 statement("break;");
15106 }
15107 else if (is_continue(block.next_block))
15108 {
15109 assert(block.merge == SPIRBlock::MergeSelection);
15110 branch_to_continue(block.self, block.next_block);
15111 }
15112 else if (BlockID(block.self) != block.next_block)
15113 emit_block_chain(get<SPIRBlock>(block.next_block));
15114 }
15115 }
15116
15117 if (block.merge == SPIRBlock::MergeLoop)
15118 {
15119 if (continue_type == SPIRBlock::DoWhileLoop)
15120 {
15121 // Make sure that we run the continue block to get the expressions set, but this
15122 // should become an empty string.
15123 // We have no fallbacks if we cannot forward everything to temporaries ...
15124 const auto &continue_block = get<SPIRBlock>(block.continue_block);
15125 bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
15126 get<SPIRBlock>(continue_block.loop_dominator));
15127
15128 uint32_t current_count = statement_count;
15129 auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
15130 if (statement_count != current_count)
15131 {
15132 // The DoWhile block has side effects, force ComplexLoop pattern next pass.
15133 get<SPIRBlock>(block.continue_block).complex_continue = true;
15134 force_recompile();
15135 }
15136
15137 // Might have to invert the do-while test here.
15138 auto condition = to_expression(continue_block.condition);
15139 if (!positive_test)
15140 condition = join("!", enclose_expression(condition));
15141
15142 end_scope_decl(join("while (", condition, ")"));
15143 }
15144 else
15145 end_scope();
15146
15147 loop_level_saver.release();
15148
15149 // We cannot break out of two loops at once, so don't check for break; here.
15150 // Using block.self as the "from" block isn't quite right, but it has the same scope
15151 // and dominance structure, so it's fine.
15152 if (is_continue(block.merge_block))
15153 branch_to_continue(block.self, block.merge_block);
15154 else
15155 emit_block_chain(get<SPIRBlock>(block.merge_block));
15156 }
15157
15158 // Forget about control dependent expressions now.
15159 block.invalidate_expressions.clear();
15160
15161 // After we return, we must be out of scope, so if we somehow have to re-emit this function,
15162 // re-declare variables if necessary.
15163 assert(rearm_dominated_variables.size() == block.dominated_variables.size());
15164 for (size_t i = 0; i < block.dominated_variables.size(); i++)
15165 {
15166 uint32_t var = block.dominated_variables[i];
15167 get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
15168 }
15169
15170 // Just like for deferred declaration, we need to forget about loop variable enable
15171 // if our block chain is reinstantiated later.
15172 for (auto &var_id : block.loop_variables)
15173 get<SPIRVariable>(var_id).loop_variable_enable = false;
15174 }
15175
begin_scope()15176 void CompilerGLSL::begin_scope()
15177 {
15178 statement("{");
15179 indent++;
15180 }
15181
end_scope()15182 void CompilerGLSL::end_scope()
15183 {
15184 if (!indent)
15185 SPIRV_CROSS_THROW("Popping empty indent stack.");
15186 indent--;
15187 statement("}");
15188 }
15189
end_scope(const string & trailer)15190 void CompilerGLSL::end_scope(const string &trailer)
15191 {
15192 if (!indent)
15193 SPIRV_CROSS_THROW("Popping empty indent stack.");
15194 indent--;
15195 statement("}", trailer);
15196 }
15197
end_scope_decl()15198 void CompilerGLSL::end_scope_decl()
15199 {
15200 if (!indent)
15201 SPIRV_CROSS_THROW("Popping empty indent stack.");
15202 indent--;
15203 statement("};");
15204 }
15205
end_scope_decl(const string & decl)15206 void CompilerGLSL::end_scope_decl(const string &decl)
15207 {
15208 if (!indent)
15209 SPIRV_CROSS_THROW("Popping empty indent stack.");
15210 indent--;
15211 statement("} ", decl, ";");
15212 }
15213
check_function_call_constraints(const uint32_t * args,uint32_t length)15214 void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
15215 {
15216 // If our variable is remapped, and we rely on type-remapping information as
15217 // well, then we cannot pass the variable as a function parameter.
15218 // Fixing this is non-trivial without stamping out variants of the same function,
15219 // so for now warn about this and suggest workarounds instead.
15220 for (uint32_t i = 0; i < length; i++)
15221 {
15222 auto *var = maybe_get<SPIRVariable>(args[i]);
15223 if (!var || !var->remapped_variable)
15224 continue;
15225
15226 auto &type = get<SPIRType>(var->basetype);
15227 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
15228 {
15229 SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
15230 "This will not work correctly because type-remapping information is lost. "
15231 "To workaround, please consider not passing the subpass input as a function parameter, "
15232 "or use in/out variables instead which do not need type remapping information.");
15233 }
15234 }
15235 }
15236
get_next_instruction_in_block(const Instruction & instr)15237 const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
15238 {
15239 // FIXME: This is kind of hacky. There should be a cleaner way.
15240 auto offset = uint32_t(&instr - current_emitting_block->ops.data());
15241 if ((offset + 1) < current_emitting_block->ops.size())
15242 return ¤t_emitting_block->ops[offset + 1];
15243 else
15244 return nullptr;
15245 }
15246
mask_relevant_memory_semantics(uint32_t semantics)15247 uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
15248 {
15249 return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
15250 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
15251 MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
15252 }
15253
emit_array_copy(const string & lhs,uint32_t,uint32_t rhs_id,StorageClass,StorageClass)15254 void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
15255 {
15256 statement(lhs, " = ", to_expression(rhs_id), ";");
15257 }
15258
unroll_array_to_complex_store(uint32_t target_id,uint32_t source_id)15259 bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
15260 {
15261 if (!backend.force_gl_in_out_block)
15262 return false;
15263 // This path is only relevant for GL backends.
15264
15265 auto *var = maybe_get<SPIRVariable>(target_id);
15266 if (!var || var->storage != StorageClassOutput)
15267 return false;
15268
15269 if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask)
15270 return false;
15271
15272 auto &type = expression_type(source_id);
15273 string array_expr;
15274 if (type.array_size_literal.back())
15275 {
15276 array_expr = convert_to_string(type.array.back());
15277 if (type.array.back() == 0)
15278 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
15279 }
15280 else
15281 array_expr = to_expression(type.array.back());
15282
15283 SPIRType target_type;
15284 target_type.basetype = SPIRType::Int;
15285
15286 statement("for (int i = 0; i < int(", array_expr, "); i++)");
15287 begin_scope();
15288 statement(to_expression(target_id), "[i] = ",
15289 bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")),
15290 ";");
15291 end_scope();
15292
15293 return true;
15294 }
15295
unroll_array_from_complex_load(uint32_t target_id,uint32_t source_id,std::string & expr)15296 void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
15297 {
15298 if (!backend.force_gl_in_out_block)
15299 return;
15300 // This path is only relevant for GL backends.
15301
15302 auto *var = maybe_get<SPIRVariable>(source_id);
15303 if (!var)
15304 return;
15305
15306 if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
15307 return;
15308
15309 auto &type = get_variable_data_type(*var);
15310 if (type.array.empty())
15311 return;
15312
15313 auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
15314 bool is_builtin = is_builtin_variable(*var) &&
15315 (builtin == BuiltInPointSize ||
15316 builtin == BuiltInPosition ||
15317 builtin == BuiltInSampleMask);
15318 bool is_tess = is_tessellation_shader();
15319 bool is_patch = has_decoration(var->self, DecorationPatch);
15320 bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
15321
15322 // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
15323 // We must unroll the array load.
15324 // For builtins, we couldn't catch this case normally,
15325 // because this is resolved in the OpAccessChain in most cases.
15326 // If we load the entire array, we have no choice but to unroll here.
15327 if (!is_patch && (is_builtin || is_tess))
15328 {
15329 auto new_expr = join("_", target_id, "_unrolled");
15330 statement(variable_decl(type, new_expr, target_id), ";");
15331 string array_expr;
15332 if (type.array_size_literal.back())
15333 {
15334 array_expr = convert_to_string(type.array.back());
15335 if (type.array.back() == 0)
15336 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
15337 }
15338 else
15339 array_expr = to_expression(type.array.back());
15340
15341 // The array size might be a specialization constant, so use a for-loop instead.
15342 statement("for (int i = 0; i < int(", array_expr, "); i++)");
15343 begin_scope();
15344 if (is_builtin && !is_sample_mask)
15345 statement(new_expr, "[i] = gl_in[i].", expr, ";");
15346 else if (is_sample_mask)
15347 {
15348 SPIRType target_type;
15349 target_type.basetype = SPIRType::Int;
15350 statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";");
15351 }
15352 else
15353 statement(new_expr, "[i] = ", expr, "[i];");
15354 end_scope();
15355
15356 expr = move(new_expr);
15357 }
15358 }
15359
cast_from_builtin_load(uint32_t source_id,std::string & expr,const SPIRType & expr_type)15360 void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
15361 {
15362 // We will handle array cases elsewhere.
15363 if (!expr_type.array.empty())
15364 return;
15365
15366 auto *var = maybe_get_backing_variable(source_id);
15367 if (var)
15368 source_id = var->self;
15369
15370 // Only interested in standalone builtin variables.
15371 if (!has_decoration(source_id, DecorationBuiltIn))
15372 return;
15373
15374 auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
15375 auto expected_type = expr_type.basetype;
15376
15377 // TODO: Fill in for more builtins.
15378 switch (builtin)
15379 {
15380 case BuiltInLayer:
15381 case BuiltInPrimitiveId:
15382 case BuiltInViewportIndex:
15383 case BuiltInInstanceId:
15384 case BuiltInInstanceIndex:
15385 case BuiltInVertexId:
15386 case BuiltInVertexIndex:
15387 case BuiltInSampleId:
15388 case BuiltInBaseVertex:
15389 case BuiltInBaseInstance:
15390 case BuiltInDrawIndex:
15391 case BuiltInFragStencilRefEXT:
15392 case BuiltInInstanceCustomIndexNV:
15393 case BuiltInSampleMask:
15394 case BuiltInPrimitiveShadingRateKHR:
15395 case BuiltInShadingRateKHR:
15396 expected_type = SPIRType::Int;
15397 break;
15398
15399 case BuiltInGlobalInvocationId:
15400 case BuiltInLocalInvocationId:
15401 case BuiltInWorkgroupId:
15402 case BuiltInLocalInvocationIndex:
15403 case BuiltInWorkgroupSize:
15404 case BuiltInNumWorkgroups:
15405 case BuiltInIncomingRayFlagsNV:
15406 case BuiltInLaunchIdNV:
15407 case BuiltInLaunchSizeNV:
15408 expected_type = SPIRType::UInt;
15409 break;
15410
15411 default:
15412 break;
15413 }
15414
15415 if (expected_type != expr_type.basetype)
15416 expr = bitcast_expression(expr_type, expected_type, expr);
15417 }
15418
cast_to_builtin_store(uint32_t target_id,std::string & expr,const SPIRType & expr_type)15419 void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
15420 {
15421 auto *var = maybe_get_backing_variable(target_id);
15422 if (var)
15423 target_id = var->self;
15424
15425 // Only interested in standalone builtin variables.
15426 if (!has_decoration(target_id, DecorationBuiltIn))
15427 return;
15428
15429 auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
15430 auto expected_type = expr_type.basetype;
15431
15432 // TODO: Fill in for more builtins.
15433 switch (builtin)
15434 {
15435 case BuiltInLayer:
15436 case BuiltInPrimitiveId:
15437 case BuiltInViewportIndex:
15438 case BuiltInFragStencilRefEXT:
15439 case BuiltInSampleMask:
15440 case BuiltInPrimitiveShadingRateKHR:
15441 case BuiltInShadingRateKHR:
15442 expected_type = SPIRType::Int;
15443 break;
15444
15445 default:
15446 break;
15447 }
15448
15449 if (expected_type != expr_type.basetype)
15450 {
15451 auto type = expr_type;
15452 type.basetype = expected_type;
15453 expr = bitcast_expression(type, expr_type.basetype, expr);
15454 }
15455 }
15456
convert_non_uniform_expression(string & expr,uint32_t ptr_id)15457 void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
15458 {
15459 if (*backend.nonuniform_qualifier == '\0')
15460 return;
15461
15462 auto *var = maybe_get_backing_variable(ptr_id);
15463 if (!var)
15464 return;
15465
15466 if (var->storage != StorageClassUniformConstant &&
15467 var->storage != StorageClassStorageBuffer &&
15468 var->storage != StorageClassUniform)
15469 return;
15470
15471 auto &backing_type = get<SPIRType>(var->basetype);
15472 if (backing_type.array.empty())
15473 return;
15474
15475 // If we get here, we know we're accessing an arrayed resource which
15476 // might require nonuniform qualifier.
15477
15478 auto start_array_index = expr.find_first_of('[');
15479
15480 if (start_array_index == string::npos)
15481 return;
15482
15483 // We've opened a bracket, track expressions until we can close the bracket.
15484 // This must be our resource index.
15485 size_t end_array_index = string::npos;
15486 unsigned bracket_count = 1;
15487 for (size_t index = start_array_index + 1; index < expr.size(); index++)
15488 {
15489 if (expr[index] == ']')
15490 {
15491 if (--bracket_count == 0)
15492 {
15493 end_array_index = index;
15494 break;
15495 }
15496 }
15497 else if (expr[index] == '[')
15498 bracket_count++;
15499 }
15500
15501 assert(bracket_count == 0);
15502
15503 // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
15504 // nothing we can do here to express that.
15505 if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
15506 return;
15507
15508 start_array_index++;
15509
15510 expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
15511 expr.substr(start_array_index, end_array_index - start_array_index), ")",
15512 expr.substr(end_array_index, string::npos));
15513 }
15514
emit_block_hints(const SPIRBlock & block)15515 void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
15516 {
15517 if ((options.es && options.version < 310) || (!options.es && options.version < 140))
15518 return;
15519
15520 switch (block.hint)
15521 {
15522 case SPIRBlock::HintFlatten:
15523 require_extension_internal("GL_EXT_control_flow_attributes");
15524 statement("SPIRV_CROSS_FLATTEN");
15525 break;
15526 case SPIRBlock::HintDontFlatten:
15527 require_extension_internal("GL_EXT_control_flow_attributes");
15528 statement("SPIRV_CROSS_BRANCH");
15529 break;
15530 case SPIRBlock::HintUnroll:
15531 require_extension_internal("GL_EXT_control_flow_attributes");
15532 statement("SPIRV_CROSS_UNROLL");
15533 break;
15534 case SPIRBlock::HintDontUnroll:
15535 require_extension_internal("GL_EXT_control_flow_attributes");
15536 statement("SPIRV_CROSS_LOOP");
15537 break;
15538 default:
15539 break;
15540 }
15541 }
15542
preserve_alias_on_reset(uint32_t id)15543 void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
15544 {
15545 preserved_aliases[id] = get_name(id);
15546 }
15547
reset_name_caches()15548 void CompilerGLSL::reset_name_caches()
15549 {
15550 for (auto &preserved : preserved_aliases)
15551 set_name(preserved.first, preserved.second);
15552
15553 preserved_aliases.clear();
15554 resource_names.clear();
15555 block_input_names.clear();
15556 block_output_names.clear();
15557 block_ubo_names.clear();
15558 block_ssbo_names.clear();
15559 block_names.clear();
15560 function_overloads.clear();
15561 }
15562
fixup_type_alias()15563 void CompilerGLSL::fixup_type_alias()
15564 {
15565 // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
15566 ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
15567 if (!type.type_alias)
15568 return;
15569
15570 if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
15571 {
15572 // Top-level block types should never alias anything else.
15573 type.type_alias = 0;
15574 }
15575 else if (type_is_block_like(type) && type.self == ID(self))
15576 {
15577 // A block-like type is any type which contains Offset decoration, but not top-level blocks,
15578 // i.e. blocks which are placed inside buffers.
15579 // Become the master.
15580 ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
15581 if (other_id == self)
15582 return;
15583
15584 if (other_type.type_alias == type.type_alias)
15585 other_type.type_alias = self;
15586 });
15587
15588 this->get<SPIRType>(type.type_alias).type_alias = self;
15589 type.type_alias = 0;
15590 }
15591 });
15592 }
15593
reorder_type_alias()15594 void CompilerGLSL::reorder_type_alias()
15595 {
15596 // Reorder declaration of types so that the master of the type alias is always emitted first.
15597 // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
15598 // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
15599 auto loop_lock = ir.create_loop_hard_lock();
15600
15601 auto &type_ids = ir.ids_for_type[TypeType];
15602 for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
15603 {
15604 auto &type = get<SPIRType>(*alias_itr);
15605 if (type.type_alias != TypeID(0) &&
15606 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
15607 {
15608 // We will skip declaring this type, so make sure the type_alias type comes before.
15609 auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
15610 assert(master_itr != end(type_ids));
15611
15612 if (alias_itr < master_itr)
15613 {
15614 // Must also swap the type order for the constant-type joined array.
15615 auto &joined_types = ir.ids_for_constant_or_type;
15616 auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
15617 auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
15618 assert(alt_alias_itr != end(joined_types));
15619 assert(alt_master_itr != end(joined_types));
15620
15621 swap(*alias_itr, *master_itr);
15622 swap(*alt_alias_itr, *alt_master_itr);
15623 }
15624 }
15625 }
15626 }
15627
emit_line_directive(uint32_t file_id,uint32_t line_literal)15628 void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
15629 {
15630 // If we are redirecting statements, ignore the line directive.
15631 // Common case here is continue blocks.
15632 if (redirect_statement)
15633 return;
15634
15635 if (options.emit_line_directives)
15636 {
15637 require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
15638 statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
15639 }
15640 }
15641
emit_copy_logical_type(uint32_t lhs_id,uint32_t lhs_type_id,uint32_t rhs_id,uint32_t rhs_type_id,SmallVector<uint32_t> chain)15642 void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
15643 SmallVector<uint32_t> chain)
15644 {
15645 // Fully unroll all member/array indices one by one.
15646
15647 auto &lhs_type = get<SPIRType>(lhs_type_id);
15648 auto &rhs_type = get<SPIRType>(rhs_type_id);
15649
15650 if (!lhs_type.array.empty())
15651 {
15652 // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
15653 // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
15654 uint32_t array_size = to_array_size_literal(lhs_type);
15655 chain.push_back(0);
15656
15657 for (uint32_t i = 0; i < array_size; i++)
15658 {
15659 chain.back() = i;
15660 emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
15661 }
15662 }
15663 else if (lhs_type.basetype == SPIRType::Struct)
15664 {
15665 chain.push_back(0);
15666 uint32_t member_count = uint32_t(lhs_type.member_types.size());
15667 for (uint32_t i = 0; i < member_count; i++)
15668 {
15669 chain.back() = i;
15670 emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
15671 }
15672 }
15673 else
15674 {
15675 // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
15676 // particularly in MSL.
15677 // To deal with this, we emit access chains and go through emit_store_statement
15678 // to deal with all the special cases we can encounter.
15679
15680 AccessChainMeta lhs_meta, rhs_meta;
15681 auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
15682 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
15683 auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
15684 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
15685
15686 uint32_t id = ir.increase_bound_by(2);
15687 lhs_id = id;
15688 rhs_id = id + 1;
15689
15690 {
15691 auto &lhs_expr = set<SPIRExpression>(lhs_id, move(lhs), lhs_type_id, true);
15692 lhs_expr.need_transpose = lhs_meta.need_transpose;
15693
15694 if (lhs_meta.storage_is_packed)
15695 set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15696 if (lhs_meta.storage_physical_type != 0)
15697 set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
15698
15699 forwarded_temporaries.insert(lhs_id);
15700 suppressed_usage_tracking.insert(lhs_id);
15701 }
15702
15703 {
15704 auto &rhs_expr = set<SPIRExpression>(rhs_id, move(rhs), rhs_type_id, true);
15705 rhs_expr.need_transpose = rhs_meta.need_transpose;
15706
15707 if (rhs_meta.storage_is_packed)
15708 set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15709 if (rhs_meta.storage_physical_type != 0)
15710 set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
15711
15712 forwarded_temporaries.insert(rhs_id);
15713 suppressed_usage_tracking.insert(rhs_id);
15714 }
15715
15716 emit_store_statement(lhs_id, rhs_id);
15717 }
15718 }
15719
subpass_input_is_framebuffer_fetch(uint32_t id) const15720 bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
15721 {
15722 if (!has_decoration(id, DecorationInputAttachmentIndex))
15723 return false;
15724
15725 uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
15726 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15727 if (remap.first == input_attachment_index)
15728 return true;
15729
15730 return false;
15731 }
15732
find_subpass_input_by_attachment_index(uint32_t index) const15733 const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
15734 {
15735 const SPIRVariable *ret = nullptr;
15736 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15737 if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
15738 get_decoration(var.self, DecorationInputAttachmentIndex) == index)
15739 {
15740 ret = &var;
15741 }
15742 });
15743 return ret;
15744 }
15745
find_color_output_by_location(uint32_t location) const15746 const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
15747 {
15748 const SPIRVariable *ret = nullptr;
15749 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15750 if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
15751 ret = &var;
15752 });
15753 return ret;
15754 }
15755
emit_inout_fragment_outputs_copy_to_subpass_inputs()15756 void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
15757 {
15758 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15759 {
15760 auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
15761 auto *output_var = find_color_output_by_location(remap.second);
15762 if (!subpass_var)
15763 continue;
15764 if (!output_var)
15765 SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
15766 "to read from it.");
15767 if (is_array(get<SPIRType>(output_var->basetype)))
15768 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
15769
15770 auto &func = get<SPIRFunction>(get_entry_point().self);
15771 func.fixup_hooks_in.push_back([=]() {
15772 if (is_legacy())
15773 {
15774 statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
15775 get_decoration(output_var->self, DecorationLocation), "];");
15776 }
15777 else
15778 {
15779 uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
15780 statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
15781 to_expression(output_var->self), ";");
15782 }
15783 });
15784 }
15785 }
15786
variable_is_depth_or_compare(VariableID id) const15787 bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
15788 {
15789 return image_is_comparison(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
15790 }
15791
get_extension_name(Candidate c)15792 const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
15793 {
15794 static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
15795 "GL_KHR_shader_subgroup_basic",
15796 "GL_KHR_shader_subgroup_vote",
15797 "GL_NV_gpu_shader_5",
15798 "GL_NV_shader_thread_group",
15799 "GL_NV_shader_thread_shuffle",
15800 "GL_ARB_shader_ballot",
15801 "GL_ARB_shader_group_vote",
15802 "GL_AMD_gcn_shader" };
15803 return retval[c];
15804 }
15805
get_extra_required_extension_names(Candidate c)15806 SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
15807 {
15808 switch (c)
15809 {
15810 case ARB_shader_ballot:
15811 return { "GL_ARB_shader_int64" };
15812 case AMD_gcn_shader:
15813 return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
15814 default:
15815 return {};
15816 }
15817 }
15818
get_extra_required_extension_predicate(Candidate c)15819 const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
15820 {
15821 switch (c)
15822 {
15823 case ARB_shader_ballot:
15824 return "defined(GL_ARB_shader_int64)";
15825 case AMD_gcn_shader:
15826 return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
15827 default:
15828 return "";
15829 }
15830 }
15831
15832 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_feature_dependencies(Feature feature)15833 get_feature_dependencies(Feature feature)
15834 {
15835 switch (feature)
15836 {
15837 case SubgroupAllEqualT:
15838 return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
15839 case SubgroupElect:
15840 return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
15841 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15842 return { SubgroupMask };
15843 case SubgroupBallotBitCount:
15844 return { SubgroupBallot };
15845 default:
15846 return {};
15847 }
15848 }
15849
15850 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
get_feature_dependency_mask(Feature feature)15851 get_feature_dependency_mask(Feature feature)
15852 {
15853 return build_mask(get_feature_dependencies(feature));
15854 }
15855
can_feature_be_implemented_without_extensions(Feature feature)15856 bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
15857 {
15858 static const bool retval[FeatureCount] = { false, false, false, false, false, false,
15859 true, // SubgroupBalloFindLSB_MSB
15860 false, false, false, false,
15861 true, // SubgroupMemBarrier - replaced with workgroup memory barriers
15862 false, false, true, false };
15863
15864 return retval[feature];
15865 }
15866
15867 CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
get_KHR_extension_for_feature(Feature feature)15868 get_KHR_extension_for_feature(Feature feature)
15869 {
15870 static const Candidate extensions[FeatureCount] = {
15871 KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
15872 KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
15873 KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
15874 KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
15875 };
15876
15877 return extensions[feature];
15878 }
15879
request_feature(Feature feature)15880 void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
15881 {
15882 feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
15883 }
15884
is_feature_requested(Feature feature) const15885 bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
15886 {
15887 return (feature_mask & (1u << feature)) != 0;
15888 }
15889
resolve() const15890 CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
15891 {
15892 Result res;
15893
15894 for (uint32_t i = 0u; i < FeatureCount; ++i)
15895 {
15896 if (feature_mask & (1u << i))
15897 {
15898 auto feature = static_cast<Feature>(i);
15899 std::unordered_set<uint32_t> unique_candidates;
15900
15901 auto candidates = get_candidates_for_feature(feature);
15902 unique_candidates.insert(candidates.begin(), candidates.end());
15903
15904 auto deps = get_feature_dependencies(feature);
15905 for (Feature d : deps)
15906 {
15907 candidates = get_candidates_for_feature(d);
15908 if (!candidates.empty())
15909 unique_candidates.insert(candidates.begin(), candidates.end());
15910 }
15911
15912 for (uint32_t c : unique_candidates)
15913 ++res.weights[static_cast<Candidate>(c)];
15914 }
15915 }
15916
15917 return res;
15918 }
15919
15920 CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_candidates_for_feature(Feature ft,const Result & r)15921 get_candidates_for_feature(Feature ft, const Result &r)
15922 {
15923 auto c = get_candidates_for_feature(ft);
15924 auto cmp = [&r](Candidate a, Candidate b) {
15925 if (r.weights[a] == r.weights[b])
15926 return a < b; // Prefer candidates with lower enum value
15927 return r.weights[a] > r.weights[b];
15928 };
15929 std::sort(c.begin(), c.end(), cmp);
15930 return c;
15931 }
15932
15933 CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_candidates_for_feature(Feature feature)15934 get_candidates_for_feature(Feature feature)
15935 {
15936 switch (feature)
15937 {
15938 case SubgroupMask:
15939 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
15940 case SubgroupSize:
15941 return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
15942 case SubgroupInvocationID:
15943 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
15944 case SubgroupID:
15945 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
15946 case NumSubgroups:
15947 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
15948 case SubgroupBroadcast_First:
15949 return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
15950 case SubgroupBallotFindLSB_MSB:
15951 return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
15952 case SubgroupAll_Any_AllEqualBool:
15953 return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
15954 case SubgroupAllEqualT:
15955 return {}; // depends on other features only
15956 case SubgroupElect:
15957 return {}; // depends on other features only
15958 case SubgroupBallot:
15959 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
15960 case SubgroupBarrier:
15961 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
15962 case SubgroupMemBarrier:
15963 return { KHR_shader_subgroup_basic };
15964 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15965 return {};
15966 case SubgroupBallotBitExtract:
15967 return { NV_shader_thread_group };
15968 case SubgroupBallotBitCount:
15969 return {};
15970 default:
15971 return {};
15972 }
15973 }
15974
build_mask(const SmallVector<Feature> & features)15975 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
15976 const SmallVector<Feature> &features)
15977 {
15978 FeatureMask mask = 0;
15979 for (Feature f : features)
15980 mask |= FeatureMask(1) << f;
15981 return mask;
15982 }
15983
Result()15984 CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
15985 {
15986 for (auto &weight : weights)
15987 weight = 0;
15988
15989 // Make sure KHR_shader_subgroup extensions are always prefered.
15990 const uint32_t big_num = FeatureCount;
15991 weights[KHR_shader_subgroup_ballot] = big_num;
15992 weights[KHR_shader_subgroup_basic] = big_num;
15993 weights[KHR_shader_subgroup_vote] = big_num;
15994 }
15995
request_workaround_wrapper_overload(TypeID id)15996 void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
15997 {
15998 // Must be ordered to maintain deterministic output, so vector is appropriate.
15999 if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
16000 end(workaround_ubo_load_overload_types))
16001 {
16002 force_recompile();
16003 workaround_ubo_load_overload_types.push_back(id);
16004 }
16005 }
16006
rewrite_load_for_wrapped_row_major(std::string & expr,TypeID loaded_type,ID ptr)16007 void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
16008 {
16009 // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
16010 // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
16011 // ensure row_major decoration is actually respected.
16012 auto *var = maybe_get_backing_variable(ptr);
16013 if (!var)
16014 return;
16015
16016 auto &backing_type = get<SPIRType>(var->basetype);
16017 bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
16018 has_decoration(backing_type.self, DecorationBlock);
16019 if (!is_ubo)
16020 return;
16021
16022 auto *type = &get<SPIRType>(loaded_type);
16023 bool rewrite = false;
16024
16025 if (is_matrix(*type))
16026 {
16027 // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
16028 // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
16029 // If there is any row-major action going on, we apply the workaround.
16030 // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
16031 // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
16032 type = &backing_type;
16033 }
16034
16035 if (type->basetype == SPIRType::Struct)
16036 {
16037 // If we're loading a struct where any member is a row-major matrix, apply the workaround.
16038 for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
16039 {
16040 if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
16041 {
16042 rewrite = true;
16043 break;
16044 }
16045 }
16046 }
16047
16048 if (rewrite)
16049 {
16050 request_workaround_wrapper_overload(loaded_type);
16051 expr = join("spvWorkaroundRowMajor(", expr, ")");
16052 }
16053 }
16054
mask_stage_output_by_location(uint32_t location,uint32_t component)16055 void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
16056 {
16057 masked_output_locations.insert({ location, component });
16058 }
16059
mask_stage_output_by_builtin(BuiltIn builtin)16060 void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
16061 {
16062 masked_output_builtins.insert(builtin);
16063 }
16064
is_stage_output_variable_masked(const SPIRVariable & var) const16065 bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
16066 {
16067 auto &type = get<SPIRType>(var.basetype);
16068 bool is_block = has_decoration(type.self, DecorationBlock);
16069 // Blocks by themselves are never masked. Must be masked per-member.
16070 if (is_block)
16071 return false;
16072
16073 bool is_builtin = has_decoration(var.self, DecorationBuiltIn);
16074
16075 if (is_builtin)
16076 {
16077 return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn)));
16078 }
16079 else
16080 {
16081 if (!has_decoration(var.self, DecorationLocation))
16082 return false;
16083
16084 return is_stage_output_location_masked(
16085 get_decoration(var.self, DecorationLocation),
16086 get_decoration(var.self, DecorationComponent));
16087 }
16088 }
16089
is_stage_output_block_member_masked(const SPIRVariable & var,uint32_t index,bool strip_array) const16090 bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
16091 {
16092 auto &type = get<SPIRType>(var.basetype);
16093 bool is_block = has_decoration(type.self, DecorationBlock);
16094 if (!is_block)
16095 return false;
16096
16097 BuiltIn builtin = BuiltInMax;
16098 if (is_member_builtin(type, index, &builtin))
16099 {
16100 return is_stage_output_builtin_masked(builtin);
16101 }
16102 else
16103 {
16104 uint32_t location = get_declared_member_location(var, index, strip_array);
16105 uint32_t component = get_member_decoration(type.self, index, DecorationComponent);
16106 return is_stage_output_location_masked(location, component);
16107 }
16108 }
16109
is_stage_output_location_masked(uint32_t location,uint32_t component) const16110 bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
16111 {
16112 return masked_output_locations.count({ location, component }) != 0;
16113 }
16114
is_stage_output_builtin_masked(spv::BuiltIn builtin) const16115 bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
16116 {
16117 return masked_output_builtins.count(builtin) != 0;
16118 }
16119
get_declared_member_location(const SPIRVariable & var,uint32_t mbr_idx,bool strip_array) const16120 uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
16121 {
16122 auto &block_type = get<SPIRType>(var.basetype);
16123 if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation))
16124 return get_member_decoration(block_type.self, mbr_idx, DecorationLocation);
16125 else
16126 return get_accumulated_member_location(var, mbr_idx, strip_array);
16127 }
16128
get_accumulated_member_location(const SPIRVariable & var,uint32_t mbr_idx,bool strip_array) const16129 uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
16130 {
16131 auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
16132 uint32_t location = get_decoration(var.self, DecorationLocation);
16133
16134 for (uint32_t i = 0; i < mbr_idx; i++)
16135 {
16136 auto &mbr_type = get<SPIRType>(type.member_types[i]);
16137
16138 // Start counting from any place we have a new location decoration.
16139 if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
16140 location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
16141
16142 uint32_t location_count = type_to_location_count(mbr_type);
16143 location += location_count;
16144 }
16145
16146 return location;
16147 }
16148
get_expression_effective_storage_class(uint32_t ptr)16149 StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
16150 {
16151 auto *var = maybe_get_backing_variable(ptr);
16152
16153 // If the expression has been lowered to a temporary, we need to use the Generic storage class.
16154 // We're looking for the effective storage class of a given expression.
16155 // An access chain or forwarded OpLoads from such access chains
16156 // will generally have the storage class of the underlying variable, but if the load was not forwarded
16157 // we have lost any address space qualifiers.
16158 bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
16159 (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
16160
16161 if (var && !forced_temporary)
16162 {
16163 if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
16164 return StorageClassWorkgroup;
16165 if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
16166 return StorageClassStorageBuffer;
16167
16168 // Normalize SSBOs to StorageBuffer here.
16169 if (var->storage == StorageClassUniform &&
16170 has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
16171 return StorageClassStorageBuffer;
16172 else
16173 return var->storage;
16174 }
16175 else
16176 return expression_type(ptr).storage;
16177 }
16178
type_to_location_count(const SPIRType & type) const16179 uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
16180 {
16181 uint32_t count;
16182 if (type.basetype == SPIRType::Struct)
16183 {
16184 uint32_t mbr_count = uint32_t(type.member_types.size());
16185 count = 0;
16186 for (uint32_t i = 0; i < mbr_count; i++)
16187 count += type_to_location_count(get<SPIRType>(type.member_types[i]));
16188 }
16189 else
16190 {
16191 count = type.columns > 1 ? type.columns : 1;
16192 }
16193
16194 uint32_t dim_count = uint32_t(type.array.size());
16195 for (uint32_t i = 0; i < dim_count; i++)
16196 count *= to_array_size_literal(type, i);
16197
16198 return count;
16199 }
16200