• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2015-2021 Arm Limited
3  * SPDX-License-Identifier: Apache-2.0 OR MIT
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * At your option, you may choose to accept this material under either:
20  *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21  *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22  */
23 
24 #include "spirv_glsl.hpp"
25 #include "GLSL.std.450.h"
26 #include "spirv_common.hpp"
27 #include <algorithm>
28 #include <assert.h>
29 #include <cmath>
30 #include <limits>
31 #include <locale.h>
32 #include <utility>
33 
34 #ifndef _WIN32
35 #include <langinfo.h>
36 #endif
37 #include <locale.h>
38 
39 using namespace spv;
40 using namespace SPIRV_CROSS_NAMESPACE;
41 using namespace std;
42 
43 enum ExtraSubExpressionType
44 {
45 	// Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
46 	EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
47 	EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
48 };
49 
is_unsigned_opcode(Op op)50 static bool is_unsigned_opcode(Op op)
51 {
52 	// Don't have to be exhaustive, only relevant for legacy target checking ...
53 	switch (op)
54 	{
55 	case OpShiftRightLogical:
56 	case OpUGreaterThan:
57 	case OpUGreaterThanEqual:
58 	case OpULessThan:
59 	case OpULessThanEqual:
60 	case OpUConvert:
61 	case OpUDiv:
62 	case OpUMod:
63 	case OpUMulExtended:
64 	case OpConvertUToF:
65 	case OpConvertFToU:
66 		return true;
67 
68 	default:
69 		return false;
70 	}
71 }
72 
is_unsigned_glsl_opcode(GLSLstd450 op)73 static bool is_unsigned_glsl_opcode(GLSLstd450 op)
74 {
75 	// Don't have to be exhaustive, only relevant for legacy target checking ...
76 	switch (op)
77 	{
78 	case GLSLstd450UClamp:
79 	case GLSLstd450UMin:
80 	case GLSLstd450UMax:
81 	case GLSLstd450FindUMsb:
82 		return true;
83 
84 	default:
85 		return false;
86 	}
87 }
88 
packing_is_vec4_padded(BufferPackingStandard packing)89 static bool packing_is_vec4_padded(BufferPackingStandard packing)
90 {
91 	switch (packing)
92 	{
93 	case BufferPackingHLSLCbuffer:
94 	case BufferPackingHLSLCbufferPackOffset:
95 	case BufferPackingStd140:
96 	case BufferPackingStd140EnhancedLayout:
97 		return true;
98 
99 	default:
100 		return false;
101 	}
102 }
103 
packing_is_hlsl(BufferPackingStandard packing)104 static bool packing_is_hlsl(BufferPackingStandard packing)
105 {
106 	switch (packing)
107 	{
108 	case BufferPackingHLSLCbuffer:
109 	case BufferPackingHLSLCbufferPackOffset:
110 		return true;
111 
112 	default:
113 		return false;
114 	}
115 }
116 
packing_has_flexible_offset(BufferPackingStandard packing)117 static bool packing_has_flexible_offset(BufferPackingStandard packing)
118 {
119 	switch (packing)
120 	{
121 	case BufferPackingStd140:
122 	case BufferPackingStd430:
123 	case BufferPackingScalar:
124 	case BufferPackingHLSLCbuffer:
125 		return false;
126 
127 	default:
128 		return true;
129 	}
130 }
131 
packing_is_scalar(BufferPackingStandard packing)132 static bool packing_is_scalar(BufferPackingStandard packing)
133 {
134 	switch (packing)
135 	{
136 	case BufferPackingScalar:
137 	case BufferPackingScalarEnhancedLayout:
138 		return true;
139 
140 	default:
141 		return false;
142 	}
143 }
144 
packing_to_substruct_packing(BufferPackingStandard packing)145 static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
146 {
147 	switch (packing)
148 	{
149 	case BufferPackingStd140EnhancedLayout:
150 		return BufferPackingStd140;
151 	case BufferPackingStd430EnhancedLayout:
152 		return BufferPackingStd430;
153 	case BufferPackingHLSLCbufferPackOffset:
154 		return BufferPackingHLSLCbuffer;
155 	case BufferPackingScalarEnhancedLayout:
156 		return BufferPackingScalar;
157 	default:
158 		return packing;
159 	}
160 }
161 
init()162 void CompilerGLSL::init()
163 {
164 	if (ir.source.known)
165 	{
166 		options.es = ir.source.es;
167 		options.version = ir.source.version;
168 	}
169 
170 	// Query the locale to see what the decimal point is.
171 	// We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
172 	// rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
173 	// tricky.
174 #ifdef _WIN32
175 	// On Windows, localeconv uses thread-local storage, so it should be fine.
176 	const struct lconv *conv = localeconv();
177 	if (conv && conv->decimal_point)
178 		current_locale_radix_character = *conv->decimal_point;
179 #elif defined(__ANDROID__) && __ANDROID_API__ < 26
180 	// nl_langinfo is not supported on this platform, fall back to the worse alternative.
181 	const struct lconv *conv = localeconv();
182 	if (conv && conv->decimal_point)
183 		current_locale_radix_character = *conv->decimal_point;
184 #else
185 	// localeconv, the portable function is not MT safe ...
186 	const char *decimal_point = nl_langinfo(RADIXCHAR);
187 	if (decimal_point && *decimal_point != '\0')
188 		current_locale_radix_character = *decimal_point;
189 #endif
190 }
191 
to_pls_layout(PlsFormat format)192 static const char *to_pls_layout(PlsFormat format)
193 {
194 	switch (format)
195 	{
196 	case PlsR11FG11FB10F:
197 		return "layout(r11f_g11f_b10f) ";
198 	case PlsR32F:
199 		return "layout(r32f) ";
200 	case PlsRG16F:
201 		return "layout(rg16f) ";
202 	case PlsRGB10A2:
203 		return "layout(rgb10_a2) ";
204 	case PlsRGBA8:
205 		return "layout(rgba8) ";
206 	case PlsRG16:
207 		return "layout(rg16) ";
208 	case PlsRGBA8I:
209 		return "layout(rgba8i)";
210 	case PlsRG16I:
211 		return "layout(rg16i) ";
212 	case PlsRGB10A2UI:
213 		return "layout(rgb10_a2ui) ";
214 	case PlsRGBA8UI:
215 		return "layout(rgba8ui) ";
216 	case PlsRG16UI:
217 		return "layout(rg16ui) ";
218 	case PlsR32UI:
219 		return "layout(r32ui) ";
220 	default:
221 		return "";
222 	}
223 }
224 
pls_format_to_basetype(PlsFormat format)225 static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
226 {
227 	switch (format)
228 	{
229 	default:
230 	case PlsR11FG11FB10F:
231 	case PlsR32F:
232 	case PlsRG16F:
233 	case PlsRGB10A2:
234 	case PlsRGBA8:
235 	case PlsRG16:
236 		return SPIRType::Float;
237 
238 	case PlsRGBA8I:
239 	case PlsRG16I:
240 		return SPIRType::Int;
241 
242 	case PlsRGB10A2UI:
243 	case PlsRGBA8UI:
244 	case PlsRG16UI:
245 	case PlsR32UI:
246 		return SPIRType::UInt;
247 	}
248 }
249 
pls_format_to_components(PlsFormat format)250 static uint32_t pls_format_to_components(PlsFormat format)
251 {
252 	switch (format)
253 	{
254 	default:
255 	case PlsR32F:
256 	case PlsR32UI:
257 		return 1;
258 
259 	case PlsRG16F:
260 	case PlsRG16:
261 	case PlsRG16UI:
262 	case PlsRG16I:
263 		return 2;
264 
265 	case PlsR11FG11FB10F:
266 		return 3;
267 
268 	case PlsRGB10A2:
269 	case PlsRGBA8:
270 	case PlsRGBA8I:
271 	case PlsRGB10A2UI:
272 	case PlsRGBA8UI:
273 		return 4;
274 	}
275 }
276 
vector_swizzle(int vecsize,int index)277 const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
278 {
279 	static const char *const swizzle[4][4] = {
280 		{ ".x", ".y", ".z", ".w" },
281 		{ ".xy", ".yz", ".zw", nullptr },
282 		{ ".xyz", ".yzw", nullptr, nullptr },
283 #if defined(__GNUC__) && (__GNUC__ == 9)
284 		// This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
285 		// This array ends up being compiled as all nullptrs, tripping the assertions below.
286 		{ "", nullptr, nullptr, "$" },
287 #else
288 		{ "", nullptr, nullptr, nullptr },
289 #endif
290 	};
291 
292 	assert(vecsize >= 1 && vecsize <= 4);
293 	assert(index >= 0 && index < 4);
294 	assert(swizzle[vecsize - 1][index]);
295 
296 	return swizzle[vecsize - 1][index];
297 }
298 
reset()299 void CompilerGLSL::reset()
300 {
301 	// We do some speculative optimizations which should pretty much always work out,
302 	// but just in case the SPIR-V is rather weird, recompile until it's happy.
303 	// This typically only means one extra pass.
304 	clear_force_recompile();
305 
306 	// Clear invalid expression tracking.
307 	invalid_expressions.clear();
308 	current_function = nullptr;
309 
310 	// Clear temporary usage tracking.
311 	expression_usage_counts.clear();
312 	forwarded_temporaries.clear();
313 	suppressed_usage_tracking.clear();
314 
315 	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
316 	flushed_phi_variables.clear();
317 
318 	reset_name_caches();
319 
320 	ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
321 		func.active = false;
322 		func.flush_undeclared = true;
323 	});
324 
325 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
326 
327 	ir.reset_all_of_type<SPIRExpression>();
328 	ir.reset_all_of_type<SPIRAccessChain>();
329 
330 	statement_count = 0;
331 	indent = 0;
332 	current_loop_level = 0;
333 }
334 
remap_pls_variables()335 void CompilerGLSL::remap_pls_variables()
336 {
337 	for (auto &input : pls_inputs)
338 	{
339 		auto &var = get<SPIRVariable>(input.id);
340 
341 		bool input_is_target = false;
342 		if (var.storage == StorageClassUniformConstant)
343 		{
344 			auto &type = get<SPIRType>(var.basetype);
345 			input_is_target = type.image.dim == DimSubpassData;
346 		}
347 
348 		if (var.storage != StorageClassInput && !input_is_target)
349 			SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
350 		var.remapped_variable = true;
351 	}
352 
353 	for (auto &output : pls_outputs)
354 	{
355 		auto &var = get<SPIRVariable>(output.id);
356 		if (var.storage != StorageClassOutput)
357 			SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
358 		var.remapped_variable = true;
359 	}
360 }
361 
remap_ext_framebuffer_fetch(uint32_t input_attachment_index,uint32_t color_location,bool coherent)362 void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
363 {
364 	subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
365 	inout_color_attachments.push_back({ color_location, coherent });
366 }
367 
location_is_framebuffer_fetch(uint32_t location) const368 bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
369 {
370 	return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
371 	                    [&](const std::pair<uint32_t, bool> &elem) {
372 		                    return elem.first == location;
373 	                    }) != end(inout_color_attachments);
374 }
375 
location_is_non_coherent_framebuffer_fetch(uint32_t location) const376 bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
377 {
378 	return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
379 	                    [&](const std::pair<uint32_t, bool> &elem) {
380 		                    return elem.first == location && !elem.second;
381 	                    }) != end(inout_color_attachments);
382 }
383 
find_static_extensions()384 void CompilerGLSL::find_static_extensions()
385 {
386 	ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
387 		if (type.basetype == SPIRType::Double)
388 		{
389 			if (options.es)
390 				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
391 			if (!options.es && options.version < 400)
392 				require_extension_internal("GL_ARB_gpu_shader_fp64");
393 		}
394 		else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
395 		{
396 			if (options.es)
397 				SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
398 			if (!options.es)
399 				require_extension_internal("GL_ARB_gpu_shader_int64");
400 		}
401 		else if (type.basetype == SPIRType::Half)
402 		{
403 			require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
404 			if (options.vulkan_semantics)
405 				require_extension_internal("GL_EXT_shader_16bit_storage");
406 		}
407 		else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
408 		{
409 			require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
410 			if (options.vulkan_semantics)
411 				require_extension_internal("GL_EXT_shader_8bit_storage");
412 		}
413 		else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
414 		{
415 			require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
416 			if (options.vulkan_semantics)
417 				require_extension_internal("GL_EXT_shader_16bit_storage");
418 		}
419 	});
420 
421 	auto &execution = get_entry_point();
422 	switch (execution.model)
423 	{
424 	case ExecutionModelGLCompute:
425 		if (!options.es && options.version < 430)
426 			require_extension_internal("GL_ARB_compute_shader");
427 		if (options.es && options.version < 310)
428 			SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
429 		break;
430 
431 	case ExecutionModelGeometry:
432 		if (options.es && options.version < 320)
433 			require_extension_internal("GL_EXT_geometry_shader");
434 		if (!options.es && options.version < 150)
435 			require_extension_internal("GL_ARB_geometry_shader4");
436 
437 		if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
438 		{
439 			// Instanced GS is part of 400 core or this extension.
440 			if (!options.es && options.version < 400)
441 				require_extension_internal("GL_ARB_gpu_shader5");
442 		}
443 		break;
444 
445 	case ExecutionModelTessellationEvaluation:
446 	case ExecutionModelTessellationControl:
447 		if (options.es && options.version < 320)
448 			require_extension_internal("GL_EXT_tessellation_shader");
449 		if (!options.es && options.version < 400)
450 			require_extension_internal("GL_ARB_tessellation_shader");
451 		break;
452 
453 	case ExecutionModelRayGenerationKHR:
454 	case ExecutionModelIntersectionKHR:
455 	case ExecutionModelAnyHitKHR:
456 	case ExecutionModelClosestHitKHR:
457 	case ExecutionModelMissKHR:
458 	case ExecutionModelCallableKHR:
459 		// NV enums are aliases.
460 		if (options.es || options.version < 460)
461 			SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
462 		if (!options.vulkan_semantics)
463 			SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
464 
465 		// Need to figure out if we should target KHR or NV extension based on capabilities.
466 		for (auto &cap : ir.declared_capabilities)
467 		{
468 			if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
469 			    cap == CapabilityRayTraversalPrimitiveCullingKHR)
470 			{
471 				ray_tracing_is_khr = true;
472 				break;
473 			}
474 		}
475 
476 		if (ray_tracing_is_khr)
477 		{
478 			// In KHR ray tracing we pass payloads by pointer instead of location,
479 			// so make sure we assign locations properly.
480 			ray_tracing_khr_fixup_locations();
481 			require_extension_internal("GL_EXT_ray_tracing");
482 		}
483 		else
484 			require_extension_internal("GL_NV_ray_tracing");
485 		break;
486 
487 	default:
488 		break;
489 	}
490 
491 	if (!pls_inputs.empty() || !pls_outputs.empty())
492 	{
493 		if (execution.model != ExecutionModelFragment)
494 			SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
495 		require_extension_internal("GL_EXT_shader_pixel_local_storage");
496 	}
497 
498 	if (!inout_color_attachments.empty())
499 	{
500 		if (execution.model != ExecutionModelFragment)
501 			SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
502 		if (options.vulkan_semantics)
503 			SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
504 
505 		bool has_coherent = false;
506 		bool has_incoherent = false;
507 
508 		for (auto &att : inout_color_attachments)
509 		{
510 			if (att.second)
511 				has_coherent = true;
512 			else
513 				has_incoherent = true;
514 		}
515 
516 		if (has_coherent)
517 			require_extension_internal("GL_EXT_shader_framebuffer_fetch");
518 		if (has_incoherent)
519 			require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent");
520 	}
521 
522 	if (options.separate_shader_objects && !options.es && options.version < 410)
523 		require_extension_internal("GL_ARB_separate_shader_objects");
524 
525 	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
526 	{
527 		if (!options.vulkan_semantics)
528 			SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
529 		if (options.es && options.version < 320)
530 			SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
531 		else if (!options.es && options.version < 450)
532 			SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
533 		require_extension_internal("GL_EXT_buffer_reference");
534 	}
535 	else if (ir.addressing_model != AddressingModelLogical)
536 	{
537 		SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
538 	}
539 
540 	// Check for nonuniform qualifier and passthrough.
541 	// Instead of looping over all decorations to find this, just look at capabilities.
542 	for (auto &cap : ir.declared_capabilities)
543 	{
544 		switch (cap)
545 		{
546 		case CapabilityShaderNonUniformEXT:
547 			if (!options.vulkan_semantics)
548 				require_extension_internal("GL_NV_gpu_shader5");
549 			else
550 				require_extension_internal("GL_EXT_nonuniform_qualifier");
551 			break;
552 		case CapabilityRuntimeDescriptorArrayEXT:
553 			if (!options.vulkan_semantics)
554 				SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
555 			require_extension_internal("GL_EXT_nonuniform_qualifier");
556 			break;
557 
558 		case CapabilityGeometryShaderPassthroughNV:
559 			if (execution.model == ExecutionModelGeometry)
560 			{
561 				require_extension_internal("GL_NV_geometry_shader_passthrough");
562 				execution.geometry_passthrough = true;
563 			}
564 			break;
565 
566 		case CapabilityVariablePointers:
567 		case CapabilityVariablePointersStorageBuffer:
568 			SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
569 
570 		case CapabilityMultiView:
571 			if (options.vulkan_semantics)
572 				require_extension_internal("GL_EXT_multiview");
573 			else
574 			{
575 				require_extension_internal("GL_OVR_multiview2");
576 				if (options.ovr_multiview_view_count == 0)
577 					SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
578 				if (get_execution_model() != ExecutionModelVertex)
579 					SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
580 			}
581 			break;
582 
583 		case CapabilityRayQueryKHR:
584 			if (options.es || options.version < 460 || !options.vulkan_semantics)
585 				SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
586 			require_extension_internal("GL_EXT_ray_query");
587 			ray_tracing_is_khr = true;
588 			break;
589 
590 		case CapabilityRayTraversalPrimitiveCullingKHR:
591 			if (options.es || options.version < 460 || !options.vulkan_semantics)
592 				SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
593 			require_extension_internal("GL_EXT_ray_flags_primitive_culling");
594 			ray_tracing_is_khr = true;
595 			break;
596 
597 		default:
598 			break;
599 		}
600 	}
601 
602 	if (options.ovr_multiview_view_count)
603 	{
604 		if (options.vulkan_semantics)
605 			SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
606 		if (get_execution_model() != ExecutionModelVertex)
607 			SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
608 		require_extension_internal("GL_OVR_multiview2");
609 	}
610 }
611 
ray_tracing_khr_fixup_locations()612 void CompilerGLSL::ray_tracing_khr_fixup_locations()
613 {
614 	uint32_t location = 0;
615 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
616 		// Incoming payload storage can also be used for tracing.
617 		if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
618 		    var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
619 			return;
620 		if (is_hidden_variable(var))
621 			return;
622 		set_decoration(var.self, DecorationLocation, location++);
623 	});
624 }
625 
compile()626 string CompilerGLSL::compile()
627 {
628 	ir.fixup_reserved_names();
629 
630 	if (!options.vulkan_semantics)
631 	{
632 		// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
633 		backend.nonuniform_qualifier = "";
634 		backend.needs_row_major_load_workaround = true;
635 	}
636 	backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
637 	backend.force_gl_in_out_block = true;
638 	backend.supports_extensions = true;
639 	backend.use_array_constructor = true;
640 
641 	backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320);
642 
643 	if (is_legacy_es())
644 		backend.support_case_fallthrough = false;
645 
646 	// Scan the SPIR-V to find trivial uses of extensions.
647 	fixup_type_alias();
648 	reorder_type_alias();
649 	build_function_control_flow_graphs_and_analyze();
650 	find_static_extensions();
651 	fixup_image_load_store_access();
652 	update_active_builtins();
653 	analyze_image_and_sampler_usage();
654 	analyze_interlocked_resource_usage();
655 	if (!inout_color_attachments.empty())
656 		emit_inout_fragment_outputs_copy_to_subpass_inputs();
657 
658 	// Shaders might cast unrelated data to pointers of non-block types.
659 	// Find all such instances and make sure we can cast the pointers to a synthesized block type.
660 	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
661 		analyze_non_block_pointer_types();
662 
663 	uint32_t pass_count = 0;
664 	do
665 	{
666 		if (pass_count >= 3)
667 			SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
668 
669 		reset();
670 
671 		buffer.reset();
672 
673 		emit_header();
674 		emit_resources();
675 		emit_extension_workarounds(get_execution_model());
676 
677 		emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
678 
679 		pass_count++;
680 	} while (is_forcing_recompilation());
681 
682 	// Implement the interlocked wrapper function at the end.
683 	// The body was implemented in lieu of main().
684 	if (interlocked_is_complex)
685 	{
686 		statement("void main()");
687 		begin_scope();
688 		statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
689 		if (options.es)
690 			statement("beginInvocationInterlockNV();");
691 		else
692 			statement("beginInvocationInterlockARB();");
693 		statement("spvMainInterlockedBody();");
694 		if (options.es)
695 			statement("endInvocationInterlockNV();");
696 		else
697 			statement("endInvocationInterlockARB();");
698 		end_scope();
699 	}
700 
701 	// Entry point in GLSL is always main().
702 	get_entry_point().name = "main";
703 
704 	return buffer.str();
705 }
706 
get_partial_source()707 std::string CompilerGLSL::get_partial_source()
708 {
709 	return buffer.str();
710 }
711 
build_workgroup_size(SmallVector<string> & arguments,const SpecializationConstant & wg_x,const SpecializationConstant & wg_y,const SpecializationConstant & wg_z)712 void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
713                                         const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
714 {
715 	auto &execution = get_entry_point();
716 
717 	if (wg_x.id)
718 	{
719 		if (options.vulkan_semantics)
720 			arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
721 		else
722 			arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
723 	}
724 	else
725 		arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
726 
727 	if (wg_y.id)
728 	{
729 		if (options.vulkan_semantics)
730 			arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
731 		else
732 			arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
733 	}
734 	else
735 		arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
736 
737 	if (wg_z.id)
738 	{
739 		if (options.vulkan_semantics)
740 			arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
741 		else
742 			arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
743 	}
744 	else
745 		arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
746 }
747 
request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)748 void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
749 {
750 	if (options.vulkan_semantics)
751 	{
752 		auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
753 		require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
754 	}
755 	else
756 	{
757 		if (!shader_subgroup_supporter.is_feature_requested(feature))
758 			force_recompile();
759 		shader_subgroup_supporter.request_feature(feature);
760 	}
761 }
762 
emit_header()763 void CompilerGLSL::emit_header()
764 {
765 	auto &execution = get_entry_point();
766 	statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
767 
768 	if (!options.es && options.version < 420)
769 	{
770 		// Needed for binding = # on UBOs, etc.
771 		if (options.enable_420pack_extension)
772 		{
773 			statement("#ifdef GL_ARB_shading_language_420pack");
774 			statement("#extension GL_ARB_shading_language_420pack : require");
775 			statement("#endif");
776 		}
777 		// Needed for: layout(early_fragment_tests) in;
778 		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
779 			require_extension_internal("GL_ARB_shader_image_load_store");
780 	}
781 
782 	// Needed for: layout(post_depth_coverage) in;
783 	if (execution.flags.get(ExecutionModePostDepthCoverage))
784 		require_extension_internal("GL_ARB_post_depth_coverage");
785 
786 	// Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
787 	if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
788 	    execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
789 	    execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
790 	    execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
791 	{
792 		if (options.es)
793 		{
794 			if (options.version < 310)
795 				SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
796 			require_extension_internal("GL_NV_fragment_shader_interlock");
797 		}
798 		else
799 		{
800 			if (options.version < 420)
801 				require_extension_internal("GL_ARB_shader_image_load_store");
802 			require_extension_internal("GL_ARB_fragment_shader_interlock");
803 		}
804 	}
805 
806 	for (auto &ext : forced_extensions)
807 	{
808 		if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
809 		{
810 			// Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
811 			// GL_AMD_gpu_shader_half_float is a superset, so try that first.
812 			statement("#if defined(GL_AMD_gpu_shader_half_float)");
813 			statement("#extension GL_AMD_gpu_shader_half_float : require");
814 			if (!options.vulkan_semantics)
815 			{
816 				statement("#elif defined(GL_NV_gpu_shader5)");
817 				statement("#extension GL_NV_gpu_shader5 : require");
818 			}
819 			else
820 			{
821 				statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
822 				statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
823 			}
824 			statement("#else");
825 			statement("#error No extension available for FP16.");
826 			statement("#endif");
827 		}
828 		else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
829 		{
830 			if (options.vulkan_semantics)
831 				statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
832 			else
833 			{
834 				statement("#if defined(GL_AMD_gpu_shader_int16)");
835 				statement("#extension GL_AMD_gpu_shader_int16 : require");
836 				statement("#elif defined(GL_NV_gpu_shader5)");
837 				statement("#extension GL_NV_gpu_shader5 : require");
838 				statement("#else");
839 				statement("#error No extension available for Int16.");
840 				statement("#endif");
841 			}
842 		}
843 		else if (ext == "GL_ARB_post_depth_coverage")
844 		{
845 			if (options.es)
846 				statement("#extension GL_EXT_post_depth_coverage : require");
847 			else
848 			{
849 				statement("#if defined(GL_ARB_post_depth_coverge)");
850 				statement("#extension GL_ARB_post_depth_coverage : require");
851 				statement("#else");
852 				statement("#extension GL_EXT_post_depth_coverage : require");
853 				statement("#endif");
854 			}
855 		}
856 		else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
857 		{
858 			// Soft-enable this extension on plain GLSL.
859 			statement("#ifdef ", ext);
860 			statement("#extension ", ext, " : enable");
861 			statement("#endif");
862 		}
863 		else if (ext == "GL_EXT_control_flow_attributes")
864 		{
865 			// These are just hints so we can conditionally enable and fallback in the shader.
866 			statement("#if defined(GL_EXT_control_flow_attributes)");
867 			statement("#extension GL_EXT_control_flow_attributes : require");
868 			statement("#define SPIRV_CROSS_FLATTEN [[flatten]]");
869 			statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
870 			statement("#define SPIRV_CROSS_UNROLL [[unroll]]");
871 			statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]");
872 			statement("#else");
873 			statement("#define SPIRV_CROSS_FLATTEN");
874 			statement("#define SPIRV_CROSS_BRANCH");
875 			statement("#define SPIRV_CROSS_UNROLL");
876 			statement("#define SPIRV_CROSS_LOOP");
877 			statement("#endif");
878 		}
879 		else
880 			statement("#extension ", ext, " : require");
881 	}
882 
883 	if (!options.vulkan_semantics)
884 	{
885 		using Supp = ShaderSubgroupSupportHelper;
886 		auto result = shader_subgroup_supporter.resolve();
887 
888 		for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
889 		{
890 			auto feature = static_cast<Supp::Feature>(feature_index);
891 			if (!shader_subgroup_supporter.is_feature_requested(feature))
892 				continue;
893 
894 			auto exts = Supp::get_candidates_for_feature(feature, result);
895 			if (exts.empty())
896 				continue;
897 
898 			statement("");
899 
900 			for (auto &ext : exts)
901 			{
902 				const char *name = Supp::get_extension_name(ext);
903 				const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
904 				auto extra_names = Supp::get_extra_required_extension_names(ext);
905 				statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
906 				          (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
907 				for (const auto &e : extra_names)
908 					statement("#extension ", e, " : enable");
909 				statement("#extension ", name, " : require");
910 			}
911 
912 			if (!Supp::can_feature_be_implemented_without_extensions(feature))
913 			{
914 				statement("#else");
915 				statement("#error No extensions available to emulate requested subgroup feature.");
916 			}
917 
918 			statement("#endif");
919 		}
920 	}
921 
922 	for (auto &header : header_lines)
923 		statement(header);
924 
925 	SmallVector<string> inputs;
926 	SmallVector<string> outputs;
927 
928 	switch (execution.model)
929 	{
930 	case ExecutionModelVertex:
931 		if (options.ovr_multiview_view_count)
932 			inputs.push_back(join("num_views = ", options.ovr_multiview_view_count));
933 		break;
934 	case ExecutionModelGeometry:
935 		if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
936 			inputs.push_back(join("invocations = ", execution.invocations));
937 		if (execution.flags.get(ExecutionModeInputPoints))
938 			inputs.push_back("points");
939 		if (execution.flags.get(ExecutionModeInputLines))
940 			inputs.push_back("lines");
941 		if (execution.flags.get(ExecutionModeInputLinesAdjacency))
942 			inputs.push_back("lines_adjacency");
943 		if (execution.flags.get(ExecutionModeTriangles))
944 			inputs.push_back("triangles");
945 		if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
946 			inputs.push_back("triangles_adjacency");
947 
948 		if (!execution.geometry_passthrough)
949 		{
950 			// For passthrough, these are implies and cannot be declared in shader.
951 			outputs.push_back(join("max_vertices = ", execution.output_vertices));
952 			if (execution.flags.get(ExecutionModeOutputTriangleStrip))
953 				outputs.push_back("triangle_strip");
954 			if (execution.flags.get(ExecutionModeOutputPoints))
955 				outputs.push_back("points");
956 			if (execution.flags.get(ExecutionModeOutputLineStrip))
957 				outputs.push_back("line_strip");
958 		}
959 		break;
960 
961 	case ExecutionModelTessellationControl:
962 		if (execution.flags.get(ExecutionModeOutputVertices))
963 			outputs.push_back(join("vertices = ", execution.output_vertices));
964 		break;
965 
966 	case ExecutionModelTessellationEvaluation:
967 		if (execution.flags.get(ExecutionModeQuads))
968 			inputs.push_back("quads");
969 		if (execution.flags.get(ExecutionModeTriangles))
970 			inputs.push_back("triangles");
971 		if (execution.flags.get(ExecutionModeIsolines))
972 			inputs.push_back("isolines");
973 		if (execution.flags.get(ExecutionModePointMode))
974 			inputs.push_back("point_mode");
975 
976 		if (!execution.flags.get(ExecutionModeIsolines))
977 		{
978 			if (execution.flags.get(ExecutionModeVertexOrderCw))
979 				inputs.push_back("cw");
980 			if (execution.flags.get(ExecutionModeVertexOrderCcw))
981 				inputs.push_back("ccw");
982 		}
983 
984 		if (execution.flags.get(ExecutionModeSpacingFractionalEven))
985 			inputs.push_back("fractional_even_spacing");
986 		if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
987 			inputs.push_back("fractional_odd_spacing");
988 		if (execution.flags.get(ExecutionModeSpacingEqual))
989 			inputs.push_back("equal_spacing");
990 		break;
991 
992 	case ExecutionModelGLCompute:
993 	{
994 		if (execution.workgroup_size.constant != 0)
995 		{
996 			SpecializationConstant wg_x, wg_y, wg_z;
997 			get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
998 
999 			// If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
1000 			// declarations before we can emit the work group size.
1001 			if (options.vulkan_semantics ||
1002 			    ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
1003 				build_workgroup_size(inputs, wg_x, wg_y, wg_z);
1004 		}
1005 		else
1006 		{
1007 			inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
1008 			inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
1009 			inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
1010 		}
1011 		break;
1012 	}
1013 
1014 	case ExecutionModelFragment:
1015 		if (options.es)
1016 		{
1017 			switch (options.fragment.default_float_precision)
1018 			{
1019 			case Options::Lowp:
1020 				statement("precision lowp float;");
1021 				break;
1022 
1023 			case Options::Mediump:
1024 				statement("precision mediump float;");
1025 				break;
1026 
1027 			case Options::Highp:
1028 				statement("precision highp float;");
1029 				break;
1030 
1031 			default:
1032 				break;
1033 			}
1034 
1035 			switch (options.fragment.default_int_precision)
1036 			{
1037 			case Options::Lowp:
1038 				statement("precision lowp int;");
1039 				break;
1040 
1041 			case Options::Mediump:
1042 				statement("precision mediump int;");
1043 				break;
1044 
1045 			case Options::Highp:
1046 				statement("precision highp int;");
1047 				break;
1048 
1049 			default:
1050 				break;
1051 			}
1052 		}
1053 
1054 		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
1055 			inputs.push_back("early_fragment_tests");
1056 		if (execution.flags.get(ExecutionModePostDepthCoverage))
1057 			inputs.push_back("post_depth_coverage");
1058 
1059 		if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
1060 			inputs.push_back("pixel_interlock_ordered");
1061 		else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
1062 			inputs.push_back("pixel_interlock_unordered");
1063 		else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
1064 			inputs.push_back("sample_interlock_ordered");
1065 		else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
1066 			inputs.push_back("sample_interlock_unordered");
1067 
1068 		if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
1069 			statement("layout(depth_greater) out float gl_FragDepth;");
1070 		else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
1071 			statement("layout(depth_less) out float gl_FragDepth;");
1072 
1073 		break;
1074 
1075 	default:
1076 		break;
1077 	}
1078 
1079 	for (auto &cap : ir.declared_capabilities)
1080 		if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
1081 			statement("layout(primitive_culling);");
1082 
1083 	if (!inputs.empty())
1084 		statement("layout(", merge(inputs), ") in;");
1085 	if (!outputs.empty())
1086 		statement("layout(", merge(outputs), ") out;");
1087 
1088 	statement("");
1089 }
1090 
type_is_empty(const SPIRType & type)1091 bool CompilerGLSL::type_is_empty(const SPIRType &type)
1092 {
1093 	return type.basetype == SPIRType::Struct && type.member_types.empty();
1094 }
1095 
emit_struct(SPIRType & type)1096 void CompilerGLSL::emit_struct(SPIRType &type)
1097 {
1098 	// Struct types can be stamped out multiple times
1099 	// with just different offsets, matrix layouts, etc ...
1100 	// Type-punning with these types is legal, which complicates things
1101 	// when we are storing struct and array types in an SSBO for example.
1102 	// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
1103 	if (type.type_alias != TypeID(0) &&
1104 	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
1105 		return;
1106 
1107 	add_resource_name(type.self);
1108 	auto name = type_to_glsl(type);
1109 
1110 	statement(!backend.explicit_struct_type ? "struct " : "", name);
1111 	begin_scope();
1112 
1113 	type.member_name_cache.clear();
1114 
1115 	uint32_t i = 0;
1116 	bool emitted = false;
1117 	for (auto &member : type.member_types)
1118 	{
1119 		add_member_name(type, i);
1120 		emit_struct_member(type, member, i);
1121 		i++;
1122 		emitted = true;
1123 	}
1124 
1125 	// Don't declare empty structs in GLSL, this is not allowed.
1126 	if (type_is_empty(type) && !backend.supports_empty_struct)
1127 	{
1128 		statement("int empty_struct_member;");
1129 		emitted = true;
1130 	}
1131 
1132 	if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
1133 		emit_struct_padding_target(type);
1134 
1135 	end_scope_decl();
1136 
1137 	if (emitted)
1138 		statement("");
1139 }
1140 
to_interpolation_qualifiers(const Bitset & flags)1141 string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
1142 {
1143 	string res;
1144 	//if (flags & (1ull << DecorationSmooth))
1145 	//    res += "smooth ";
1146 	if (flags.get(DecorationFlat))
1147 		res += "flat ";
1148 	if (flags.get(DecorationNoPerspective))
1149 		res += "noperspective ";
1150 	if (flags.get(DecorationCentroid))
1151 		res += "centroid ";
1152 	if (flags.get(DecorationPatch))
1153 		res += "patch ";
1154 	if (flags.get(DecorationSample))
1155 		res += "sample ";
1156 	if (flags.get(DecorationInvariant))
1157 		res += "invariant ";
1158 
1159 	if (flags.get(DecorationExplicitInterpAMD))
1160 	{
1161 		require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
1162 		res += "__explicitInterpAMD ";
1163 	}
1164 
1165 	if (flags.get(DecorationPerVertexNV))
1166 	{
1167 		if (options.es && options.version < 320)
1168 			SPIRV_CROSS_THROW("pervertexNV requires ESSL 320.");
1169 		else if (!options.es && options.version < 450)
1170 			SPIRV_CROSS_THROW("pervertexNV requires GLSL 450.");
1171 		require_extension_internal("GL_NV_fragment_shader_barycentric");
1172 		res += "pervertexNV ";
1173 	}
1174 
1175 	return res;
1176 }
1177 
layout_for_member(const SPIRType & type,uint32_t index)1178 string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
1179 {
1180 	if (is_legacy())
1181 		return "";
1182 
1183 	bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1184 	if (!is_block)
1185 		return "";
1186 
1187 	auto &memb = ir.meta[type.self].members;
1188 	if (index >= memb.size())
1189 		return "";
1190 	auto &dec = memb[index];
1191 
1192 	SmallVector<string> attr;
1193 
1194 	if (has_member_decoration(type.self, index, DecorationPassthroughNV))
1195 		attr.push_back("passthrough");
1196 
1197 	// We can only apply layouts on members in block interfaces.
1198 	// This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
1199 	// This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
1200 	// has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
1201 	//
1202 	// We would like to go from (SPIR-V style):
1203 	//
1204 	// struct Foo { layout(row_major) mat4 matrix; };
1205 	// buffer UBO { Foo foo; };
1206 	//
1207 	// to
1208 	//
1209 	// struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
1210 	// buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
1211 	auto flags = combined_decoration_for_member(type, index);
1212 
1213 	if (flags.get(DecorationRowMajor))
1214 		attr.push_back("row_major");
1215 	// We don't emit any global layouts, so column_major is default.
1216 	//if (flags & (1ull << DecorationColMajor))
1217 	//    attr.push_back("column_major");
1218 
1219 	if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
1220 		attr.push_back(join("location = ", dec.location));
1221 
1222 	// Can only declare component if we can declare location.
1223 	if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
1224 	{
1225 		if (!options.es)
1226 		{
1227 			if (options.version < 440 && options.version >= 140)
1228 				require_extension_internal("GL_ARB_enhanced_layouts");
1229 			else if (options.version < 140)
1230 				SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1231 			attr.push_back(join("component = ", dec.component));
1232 		}
1233 		else
1234 			SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1235 	}
1236 
1237 	// SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
1238 	// This is only done selectively in GLSL as needed.
1239 	if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
1240 	    dec.decoration_flags.get(DecorationOffset))
1241 		attr.push_back(join("offset = ", dec.offset));
1242 	else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
1243 		attr.push_back(join("xfb_offset = ", dec.offset));
1244 
1245 	if (attr.empty())
1246 		return "";
1247 
1248 	string res = "layout(";
1249 	res += merge(attr);
1250 	res += ") ";
1251 	return res;
1252 }
1253 
format_to_glsl(spv::ImageFormat format)1254 const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1255 {
1256 	if (options.es && is_desktop_only_format(format))
1257 		SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1258 
1259 	switch (format)
1260 	{
1261 	case ImageFormatRgba32f:
1262 		return "rgba32f";
1263 	case ImageFormatRgba16f:
1264 		return "rgba16f";
1265 	case ImageFormatR32f:
1266 		return "r32f";
1267 	case ImageFormatRgba8:
1268 		return "rgba8";
1269 	case ImageFormatRgba8Snorm:
1270 		return "rgba8_snorm";
1271 	case ImageFormatRg32f:
1272 		return "rg32f";
1273 	case ImageFormatRg16f:
1274 		return "rg16f";
1275 	case ImageFormatRgba32i:
1276 		return "rgba32i";
1277 	case ImageFormatRgba16i:
1278 		return "rgba16i";
1279 	case ImageFormatR32i:
1280 		return "r32i";
1281 	case ImageFormatRgba8i:
1282 		return "rgba8i";
1283 	case ImageFormatRg32i:
1284 		return "rg32i";
1285 	case ImageFormatRg16i:
1286 		return "rg16i";
1287 	case ImageFormatRgba32ui:
1288 		return "rgba32ui";
1289 	case ImageFormatRgba16ui:
1290 		return "rgba16ui";
1291 	case ImageFormatR32ui:
1292 		return "r32ui";
1293 	case ImageFormatRgba8ui:
1294 		return "rgba8ui";
1295 	case ImageFormatRg32ui:
1296 		return "rg32ui";
1297 	case ImageFormatRg16ui:
1298 		return "rg16ui";
1299 	case ImageFormatR11fG11fB10f:
1300 		return "r11f_g11f_b10f";
1301 	case ImageFormatR16f:
1302 		return "r16f";
1303 	case ImageFormatRgb10A2:
1304 		return "rgb10_a2";
1305 	case ImageFormatR8:
1306 		return "r8";
1307 	case ImageFormatRg8:
1308 		return "rg8";
1309 	case ImageFormatR16:
1310 		return "r16";
1311 	case ImageFormatRg16:
1312 		return "rg16";
1313 	case ImageFormatRgba16:
1314 		return "rgba16";
1315 	case ImageFormatR16Snorm:
1316 		return "r16_snorm";
1317 	case ImageFormatRg16Snorm:
1318 		return "rg16_snorm";
1319 	case ImageFormatRgba16Snorm:
1320 		return "rgba16_snorm";
1321 	case ImageFormatR8Snorm:
1322 		return "r8_snorm";
1323 	case ImageFormatRg8Snorm:
1324 		return "rg8_snorm";
1325 	case ImageFormatR8ui:
1326 		return "r8ui";
1327 	case ImageFormatRg8ui:
1328 		return "rg8ui";
1329 	case ImageFormatR16ui:
1330 		return "r16ui";
1331 	case ImageFormatRgb10a2ui:
1332 		return "rgb10_a2ui";
1333 	case ImageFormatR8i:
1334 		return "r8i";
1335 	case ImageFormatRg8i:
1336 		return "rg8i";
1337 	case ImageFormatR16i:
1338 		return "r16i";
1339 	default:
1340 	case ImageFormatUnknown:
1341 		return nullptr;
1342 	}
1343 }
1344 
type_to_packed_base_size(const SPIRType & type,BufferPackingStandard)1345 uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1346 {
1347 	switch (type.basetype)
1348 	{
1349 	case SPIRType::Double:
1350 	case SPIRType::Int64:
1351 	case SPIRType::UInt64:
1352 		return 8;
1353 	case SPIRType::Float:
1354 	case SPIRType::Int:
1355 	case SPIRType::UInt:
1356 		return 4;
1357 	case SPIRType::Half:
1358 	case SPIRType::Short:
1359 	case SPIRType::UShort:
1360 		return 2;
1361 	case SPIRType::SByte:
1362 	case SPIRType::UByte:
1363 		return 1;
1364 
1365 	default:
1366 		SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1367 	}
1368 }
1369 
type_to_packed_alignment(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1370 uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1371                                                 BufferPackingStandard packing)
1372 {
1373 	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
1374 	// and is 64-bit.
1375 	if (type.storage == StorageClassPhysicalStorageBufferEXT)
1376 	{
1377 		if (!type.pointer)
1378 			SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1379 
1380 		if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1381 		{
1382 			if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1383 				return 16;
1384 			else
1385 				return 8;
1386 		}
1387 		else
1388 			SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1389 	}
1390 
1391 	if (!type.array.empty())
1392 	{
1393 		uint32_t minimum_alignment = 1;
1394 		if (packing_is_vec4_padded(packing))
1395 			minimum_alignment = 16;
1396 
1397 		auto *tmp = &get<SPIRType>(type.parent_type);
1398 		while (!tmp->array.empty())
1399 			tmp = &get<SPIRType>(tmp->parent_type);
1400 
1401 		// Get the alignment of the base type, then maybe round up.
1402 		return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
1403 	}
1404 
1405 	if (type.basetype == SPIRType::Struct)
1406 	{
1407 		// Rule 9. Structs alignments are maximum alignment of its members.
1408 		uint32_t alignment = 1;
1409 		for (uint32_t i = 0; i < type.member_types.size(); i++)
1410 		{
1411 			auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1412 			alignment =
1413 			    max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
1414 		}
1415 
1416 		// In std140, struct alignment is rounded up to 16.
1417 		if (packing_is_vec4_padded(packing))
1418 			alignment = max(alignment, 16u);
1419 
1420 		return alignment;
1421 	}
1422 	else
1423 	{
1424 		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1425 
1426 		// Alignment requirement for scalar block layout is always the alignment for the most basic component.
1427 		if (packing_is_scalar(packing))
1428 			return base_alignment;
1429 
1430 		// Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1431 		// a vec4, this is handled outside since that part knows our current offset.
1432 		if (type.columns == 1 && packing_is_hlsl(packing))
1433 			return base_alignment;
1434 
1435 		// From 7.6.2.2 in GL 4.5 core spec.
1436 		// Rule 1
1437 		if (type.vecsize == 1 && type.columns == 1)
1438 			return base_alignment;
1439 
1440 		// Rule 2
1441 		if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1442 			return type.vecsize * base_alignment;
1443 
1444 		// Rule 3
1445 		if (type.vecsize == 3 && type.columns == 1)
1446 			return 4 * base_alignment;
1447 
1448 		// Rule 4 implied. Alignment does not change in std430.
1449 
1450 		// Rule 5. Column-major matrices are stored as arrays of
1451 		// vectors.
1452 		if (flags.get(DecorationColMajor) && type.columns > 1)
1453 		{
1454 			if (packing_is_vec4_padded(packing))
1455 				return 4 * base_alignment;
1456 			else if (type.vecsize == 3)
1457 				return 4 * base_alignment;
1458 			else
1459 				return type.vecsize * base_alignment;
1460 		}
1461 
1462 		// Rule 6 implied.
1463 
1464 		// Rule 7.
1465 		if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1466 		{
1467 			if (packing_is_vec4_padded(packing))
1468 				return 4 * base_alignment;
1469 			else if (type.columns == 3)
1470 				return 4 * base_alignment;
1471 			else
1472 				return type.columns * base_alignment;
1473 		}
1474 
1475 		// Rule 8 implied.
1476 	}
1477 
1478 	SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1479 }
1480 
type_to_packed_array_stride(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1481 uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1482                                                    BufferPackingStandard packing)
1483 {
1484 	// Array stride is equal to aligned size of the underlying type.
1485 	uint32_t parent = type.parent_type;
1486 	assert(parent);
1487 
1488 	auto &tmp = get<SPIRType>(parent);
1489 
1490 	uint32_t size = type_to_packed_size(tmp, flags, packing);
1491 	uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1492 	return (size + alignment - 1) & ~(alignment - 1);
1493 }
1494 
type_to_packed_size(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1495 uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1496 {
1497 	if (!type.array.empty())
1498 	{
1499 		uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1500 
1501 		// For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
1502 		// so that it is possible to pack other vectors into the last element.
1503 		if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
1504 			packed_size -= (4 - type.vecsize) * (type.width / 8);
1505 
1506 		return packed_size;
1507 	}
1508 
1509 	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
1510 	// and is 64-bit.
1511 	if (type.storage == StorageClassPhysicalStorageBufferEXT)
1512 	{
1513 		if (!type.pointer)
1514 			SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1515 
1516 		if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1517 			return 8;
1518 		else
1519 			SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1520 	}
1521 
1522 	uint32_t size = 0;
1523 
1524 	if (type.basetype == SPIRType::Struct)
1525 	{
1526 		uint32_t pad_alignment = 1;
1527 
1528 		for (uint32_t i = 0; i < type.member_types.size(); i++)
1529 		{
1530 			auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1531 			auto &member_type = get<SPIRType>(type.member_types[i]);
1532 
1533 			uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
1534 			uint32_t alignment = max(packed_alignment, pad_alignment);
1535 
1536 			// The next member following a struct member is aligned to the base alignment of the struct that came before.
1537 			// GL 4.5 spec, 7.6.2.2.
1538 			if (member_type.basetype == SPIRType::Struct)
1539 				pad_alignment = packed_alignment;
1540 			else
1541 				pad_alignment = 1;
1542 
1543 			size = (size + alignment - 1) & ~(alignment - 1);
1544 			size += type_to_packed_size(member_type, member_flags, packing);
1545 		}
1546 	}
1547 	else
1548 	{
1549 		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1550 
1551 		if (packing_is_scalar(packing))
1552 		{
1553 			size = type.vecsize * type.columns * base_alignment;
1554 		}
1555 		else
1556 		{
1557 			if (type.columns == 1)
1558 				size = type.vecsize * base_alignment;
1559 
1560 			if (flags.get(DecorationColMajor) && type.columns > 1)
1561 			{
1562 				if (packing_is_vec4_padded(packing))
1563 					size = type.columns * 4 * base_alignment;
1564 				else if (type.vecsize == 3)
1565 					size = type.columns * 4 * base_alignment;
1566 				else
1567 					size = type.columns * type.vecsize * base_alignment;
1568 			}
1569 
1570 			if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1571 			{
1572 				if (packing_is_vec4_padded(packing))
1573 					size = type.vecsize * 4 * base_alignment;
1574 				else if (type.columns == 3)
1575 					size = type.vecsize * 4 * base_alignment;
1576 				else
1577 					size = type.vecsize * type.columns * base_alignment;
1578 			}
1579 
1580 			// For matrices in HLSL, the last element has a size which depends on its vector size,
1581 			// so that it is possible to pack other vectors into the last element.
1582 			if (packing_is_hlsl(packing) && type.columns > 1)
1583 				size -= (4 - type.vecsize) * (type.width / 8);
1584 		}
1585 	}
1586 
1587 	return size;
1588 }
1589 
buffer_is_packing_standard(const SPIRType & type,BufferPackingStandard packing,uint32_t * failed_validation_index,uint32_t start_offset,uint32_t end_offset)1590 bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1591                                               uint32_t *failed_validation_index, uint32_t start_offset,
1592                                               uint32_t end_offset)
1593 {
1594 	// This is very tricky and error prone, but try to be exhaustive and correct here.
1595 	// SPIR-V doesn't directly say if we're using std430 or std140.
1596 	// SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1597 	// so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1598 	// We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1599 	//
1600 	// It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1601 	// We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1602 	//
1603 	// The only two differences between std140 and std430 are related to padding alignment/array stride
1604 	// in arrays and structs. In std140 they take minimum vec4 alignment.
1605 	// std430 only removes the vec4 requirement.
1606 
1607 	uint32_t offset = 0;
1608 	uint32_t pad_alignment = 1;
1609 
1610 	bool is_top_level_block =
1611 	    has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1612 
1613 	for (uint32_t i = 0; i < type.member_types.size(); i++)
1614 	{
1615 		auto &memb_type = get<SPIRType>(type.member_types[i]);
1616 		auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1617 
1618 		// Verify alignment rules.
1619 		uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
1620 
1621 		// This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1622 		// layout(constant_id = 0) const int s = 10;
1623 		// const int S = s + 5; // SpecConstantOp
1624 		// buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1625 		// we would need full implementation of compile-time constant folding. :(
1626 		// If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1627 		// for our analysis (e.g. unsized arrays).
1628 		// This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1629 		// Querying size of this member will fail, so just don't call it unless we have to.
1630 		//
1631 		// This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1632 		bool member_can_be_unsized =
1633 		    is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1634 
1635 		uint32_t packed_size = 0;
1636 		if (!member_can_be_unsized || packing_is_hlsl(packing))
1637 			packed_size = type_to_packed_size(memb_type, member_flags, packing);
1638 
1639 		// We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1640 		if (packing_is_hlsl(packing))
1641 		{
1642 			// If a member straddles across a vec4 boundary, alignment is actually vec4.
1643 			uint32_t begin_word = offset / 16;
1644 			uint32_t end_word = (offset + packed_size - 1) / 16;
1645 			if (begin_word != end_word)
1646 				packed_alignment = max(packed_alignment, 16u);
1647 		}
1648 
1649 		uint32_t actual_offset = type_struct_member_offset(type, i);
1650 		// Field is not in the specified range anymore and we can ignore any further fields.
1651 		if (actual_offset >= end_offset)
1652 			break;
1653 
1654 		uint32_t alignment = max(packed_alignment, pad_alignment);
1655 		offset = (offset + alignment - 1) & ~(alignment - 1);
1656 
1657 		// The next member following a struct member is aligned to the base alignment of the struct that came before.
1658 		// GL 4.5 spec, 7.6.2.2.
1659 		if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1660 			pad_alignment = packed_alignment;
1661 		else
1662 			pad_alignment = 1;
1663 
1664 		// Only care about packing if we are in the given range
1665 		if (actual_offset >= start_offset)
1666 		{
1667 			// We only care about offsets in std140, std430, etc ...
1668 			// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1669 			if (!packing_has_flexible_offset(packing))
1670 			{
1671 				if (actual_offset != offset) // This cannot be the packing we're looking for.
1672 				{
1673 					if (failed_validation_index)
1674 						*failed_validation_index = i;
1675 					return false;
1676 				}
1677 			}
1678 			else if ((actual_offset & (alignment - 1)) != 0)
1679 			{
1680 				// We still need to verify that alignment rules are observed, even if we have explicit offset.
1681 				if (failed_validation_index)
1682 					*failed_validation_index = i;
1683 				return false;
1684 			}
1685 
1686 			// Verify array stride rules.
1687 			if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
1688 			                                    type_struct_member_array_stride(type, i))
1689 			{
1690 				if (failed_validation_index)
1691 					*failed_validation_index = i;
1692 				return false;
1693 			}
1694 
1695 			// Verify that sub-structs also follow packing rules.
1696 			// We cannot use enhanced layouts on substructs, so they better be up to spec.
1697 			auto substruct_packing = packing_to_substruct_packing(packing);
1698 
1699 			if (!memb_type.pointer && !memb_type.member_types.empty() &&
1700 			    !buffer_is_packing_standard(memb_type, substruct_packing))
1701 			{
1702 				if (failed_validation_index)
1703 					*failed_validation_index = i;
1704 				return false;
1705 			}
1706 		}
1707 
1708 		// Bump size.
1709 		offset = actual_offset + packed_size;
1710 	}
1711 
1712 	return true;
1713 }
1714 
can_use_io_location(StorageClass storage,bool block)1715 bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1716 {
1717 	// Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1718 	// Be very explicit here about how to solve the issue.
1719 	if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1720 	    (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1721 	{
1722 		uint32_t minimum_desktop_version = block ? 440 : 410;
1723 		// ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1724 
1725 		if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1726 			return false;
1727 		else if (options.es && options.version < 310)
1728 			return false;
1729 	}
1730 
1731 	if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1732 	    (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1733 	{
1734 		if (options.es && options.version < 300)
1735 			return false;
1736 		else if (!options.es && options.version < 330)
1737 			return false;
1738 	}
1739 
1740 	if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1741 	{
1742 		if (options.es && options.version < 310)
1743 			return false;
1744 		else if (!options.es && options.version < 430)
1745 			return false;
1746 	}
1747 
1748 	return true;
1749 }
1750 
layout_for_variable(const SPIRVariable & var)1751 string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1752 {
1753 	// FIXME: Come up with a better solution for when to disable layouts.
1754 	// Having layouts depend on extensions as well as which types
1755 	// of layouts are used. For now, the simple solution is to just disable
1756 	// layouts for legacy versions.
1757 	if (is_legacy())
1758 		return "";
1759 
1760 	if (subpass_input_is_framebuffer_fetch(var.self))
1761 		return "";
1762 
1763 	SmallVector<string> attr;
1764 
1765 	auto &type = get<SPIRType>(var.basetype);
1766 	auto &flags = get_decoration_bitset(var.self);
1767 	auto &typeflags = get_decoration_bitset(type.self);
1768 
1769 	if (flags.get(DecorationPassthroughNV))
1770 		attr.push_back("passthrough");
1771 
1772 	if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1773 		attr.push_back("push_constant");
1774 	else if (var.storage == StorageClassShaderRecordBufferKHR)
1775 		attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
1776 
1777 	if (flags.get(DecorationRowMajor))
1778 		attr.push_back("row_major");
1779 	if (flags.get(DecorationColMajor))
1780 		attr.push_back("column_major");
1781 
1782 	if (options.vulkan_semantics)
1783 	{
1784 		if (flags.get(DecorationInputAttachmentIndex))
1785 			attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
1786 	}
1787 
1788 	bool is_block = has_decoration(type.self, DecorationBlock);
1789 	if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
1790 	{
1791 		Bitset combined_decoration;
1792 		for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1793 			combined_decoration.merge_or(combined_decoration_for_member(type, i));
1794 
1795 		// If our members have location decorations, we don't need to
1796 		// emit location decorations at the top as well (looks weird).
1797 		if (!combined_decoration.get(DecorationLocation))
1798 			attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
1799 	}
1800 
1801 	if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
1802 	    location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation)))
1803 	{
1804 		attr.push_back("noncoherent");
1805 	}
1806 
1807 	// Transform feedback
1808 	bool uses_enhanced_layouts = false;
1809 	if (is_block && var.storage == StorageClassOutput)
1810 	{
1811 		// For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
1812 		// since all members must match the same xfb_buffer. The only thing we will declare for members of the block
1813 		// is the xfb_offset.
1814 		uint32_t member_count = uint32_t(type.member_types.size());
1815 		bool have_xfb_buffer_stride = false;
1816 		bool have_any_xfb_offset = false;
1817 		bool have_geom_stream = false;
1818 		uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
1819 
1820 		if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
1821 		{
1822 			have_xfb_buffer_stride = true;
1823 			xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
1824 			xfb_stride = get_decoration(var.self, DecorationXfbStride);
1825 		}
1826 
1827 		if (flags.get(DecorationStream))
1828 		{
1829 			have_geom_stream = true;
1830 			geom_stream = get_decoration(var.self, DecorationStream);
1831 		}
1832 
1833 		// Verify that none of the members violate our assumption.
1834 		for (uint32_t i = 0; i < member_count; i++)
1835 		{
1836 			if (has_member_decoration(type.self, i, DecorationStream))
1837 			{
1838 				uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
1839 				if (have_geom_stream && member_geom_stream != geom_stream)
1840 					SPIRV_CROSS_THROW("IO block member Stream mismatch.");
1841 				have_geom_stream = true;
1842 				geom_stream = member_geom_stream;
1843 			}
1844 
1845 			// Only members with an Offset decoration participate in XFB.
1846 			if (!has_member_decoration(type.self, i, DecorationOffset))
1847 				continue;
1848 			have_any_xfb_offset = true;
1849 
1850 			if (has_member_decoration(type.self, i, DecorationXfbBuffer))
1851 			{
1852 				uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
1853 				if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
1854 					SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
1855 				have_xfb_buffer_stride = true;
1856 				xfb_buffer = buffer_index;
1857 			}
1858 
1859 			if (has_member_decoration(type.self, i, DecorationXfbStride))
1860 			{
1861 				uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
1862 				if (have_xfb_buffer_stride && stride != xfb_stride)
1863 					SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
1864 				have_xfb_buffer_stride = true;
1865 				xfb_stride = stride;
1866 			}
1867 		}
1868 
1869 		if (have_xfb_buffer_stride && have_any_xfb_offset)
1870 		{
1871 			attr.push_back(join("xfb_buffer = ", xfb_buffer));
1872 			attr.push_back(join("xfb_stride = ", xfb_stride));
1873 			uses_enhanced_layouts = true;
1874 		}
1875 
1876 		if (have_geom_stream)
1877 		{
1878 			if (get_execution_model() != ExecutionModelGeometry)
1879 				SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1880 			if (options.es)
1881 				SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1882 			if (options.version < 400)
1883 				require_extension_internal("GL_ARB_transform_feedback3");
1884 			attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1885 		}
1886 	}
1887 	else if (var.storage == StorageClassOutput)
1888 	{
1889 		if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
1890 		{
1891 			// XFB for standalone variables, we can emit all decorations.
1892 			attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
1893 			attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
1894 			attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
1895 			uses_enhanced_layouts = true;
1896 		}
1897 
1898 		if (flags.get(DecorationStream))
1899 		{
1900 			if (get_execution_model() != ExecutionModelGeometry)
1901 				SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1902 			if (options.es)
1903 				SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1904 			if (options.version < 400)
1905 				require_extension_internal("GL_ARB_transform_feedback3");
1906 			attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1907 		}
1908 	}
1909 
1910 	// Can only declare Component if we can declare location.
1911 	if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
1912 	{
1913 		uses_enhanced_layouts = true;
1914 		attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
1915 	}
1916 
1917 	if (uses_enhanced_layouts)
1918 	{
1919 		if (!options.es)
1920 		{
1921 			if (options.version < 440 && options.version >= 140)
1922 				require_extension_internal("GL_ARB_enhanced_layouts");
1923 			else if (options.version < 140)
1924 				SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
1925 			if (!options.es && options.version < 440)
1926 				require_extension_internal("GL_ARB_enhanced_layouts");
1927 		}
1928 		else if (options.es)
1929 			SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
1930 	}
1931 
1932 	if (flags.get(DecorationIndex))
1933 		attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
1934 
1935 	// Do not emit set = decoration in regular GLSL output, but
1936 	// we need to preserve it in Vulkan GLSL mode.
1937 	if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
1938 	{
1939 		if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
1940 			attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
1941 	}
1942 
1943 	bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
1944 	bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
1945 	                  (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
1946 	bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
1947 	bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
1948 
1949 	// GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
1950 	bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
1951 
1952 	// pretend no UBOs when options say so
1953 	if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
1954 		can_use_buffer_blocks = false;
1955 
1956 	bool can_use_binding;
1957 	if (options.es)
1958 		can_use_binding = options.version >= 310;
1959 	else
1960 		can_use_binding = options.enable_420pack_extension || (options.version >= 420);
1961 
1962 	// Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
1963 	if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
1964 		can_use_binding = false;
1965 
1966 	if (var.storage == StorageClassShaderRecordBufferKHR)
1967 		can_use_binding = false;
1968 
1969 	if (can_use_binding && flags.get(DecorationBinding))
1970 		attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
1971 
1972 	if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
1973 		attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
1974 
1975 	// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
1976 	// If SPIR-V does not comply with either layout, we cannot really work around it.
1977 	if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
1978 	{
1979 		attr.push_back(buffer_to_packing_standard(type, false));
1980 	}
1981 	else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
1982 	{
1983 		attr.push_back(buffer_to_packing_standard(type, true));
1984 	}
1985 
1986 	// For images, the type itself adds a layout qualifer.
1987 	// Only emit the format for storage images.
1988 	if (type.basetype == SPIRType::Image && type.image.sampled == 2)
1989 	{
1990 		const char *fmt = format_to_glsl(type.image.format);
1991 		if (fmt)
1992 			attr.push_back(fmt);
1993 	}
1994 
1995 	if (attr.empty())
1996 		return "";
1997 
1998 	string res = "layout(";
1999 	res += merge(attr);
2000 	res += ") ";
2001 	return res;
2002 }
2003 
buffer_to_packing_standard(const SPIRType & type,bool support_std430_without_scalar_layout)2004 string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
2005 {
2006 	if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
2007 		return "std430";
2008 	else if (buffer_is_packing_standard(type, BufferPackingStd140))
2009 		return "std140";
2010 	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
2011 	{
2012 		require_extension_internal("GL_EXT_scalar_block_layout");
2013 		return "scalar";
2014 	}
2015 	else if (support_std430_without_scalar_layout &&
2016 	         buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
2017 	{
2018 		if (options.es && !options.vulkan_semantics)
2019 			SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2020 			                  "not support GL_ARB_enhanced_layouts.");
2021 		if (!options.es && !options.vulkan_semantics && options.version < 440)
2022 			require_extension_internal("GL_ARB_enhanced_layouts");
2023 
2024 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2025 		return "std430";
2026 	}
2027 	else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
2028 	{
2029 		// Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
2030 		// however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
2031 		// Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
2032 		if (options.es && !options.vulkan_semantics)
2033 			SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2034 			                  "not support GL_ARB_enhanced_layouts.");
2035 		if (!options.es && !options.vulkan_semantics && options.version < 440)
2036 			require_extension_internal("GL_ARB_enhanced_layouts");
2037 
2038 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2039 		return "std140";
2040 	}
2041 	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
2042 	{
2043 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2044 		require_extension_internal("GL_EXT_scalar_block_layout");
2045 		return "scalar";
2046 	}
2047 	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2048 	         buffer_is_packing_standard(type, BufferPackingStd430))
2049 	{
2050 		// UBOs can support std430 with GL_EXT_scalar_block_layout.
2051 		require_extension_internal("GL_EXT_scalar_block_layout");
2052 		return "std430";
2053 	}
2054 	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2055 	         buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
2056 	{
2057 		// UBOs can support std430 with GL_EXT_scalar_block_layout.
2058 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2059 		require_extension_internal("GL_EXT_scalar_block_layout");
2060 		return "std430";
2061 	}
2062 	else
2063 	{
2064 		SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
2065 		                  "layouts. You can try flattening this block to support a more flexible layout.");
2066 	}
2067 }
2068 
emit_push_constant_block(const SPIRVariable & var)2069 void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
2070 {
2071 	if (flattened_buffer_blocks.count(var.self))
2072 		emit_buffer_block_flattened(var);
2073 	else if (options.vulkan_semantics)
2074 		emit_push_constant_block_vulkan(var);
2075 	else if (options.emit_push_constant_as_uniform_buffer)
2076 		emit_buffer_block_native(var);
2077 	else
2078 		emit_push_constant_block_glsl(var);
2079 }
2080 
emit_push_constant_block_vulkan(const SPIRVariable & var)2081 void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
2082 {
2083 	emit_buffer_block(var);
2084 }
2085 
emit_push_constant_block_glsl(const SPIRVariable & var)2086 void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
2087 {
2088 	// OpenGL has no concept of push constant blocks, implement it as a uniform struct.
2089 	auto &type = get<SPIRType>(var.basetype);
2090 
2091 	auto &flags = ir.meta[var.self].decoration.decoration_flags;
2092 	flags.clear(DecorationBinding);
2093 	flags.clear(DecorationDescriptorSet);
2094 
2095 #if 0
2096     if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
2097         SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
2098                             "Remap to location with reflection API first or disable these decorations.");
2099 #endif
2100 
2101 	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2102 	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2103 	auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2104 	bool block_flag = block_flags.get(DecorationBlock);
2105 	block_flags.clear(DecorationBlock);
2106 
2107 	emit_struct(type);
2108 
2109 	if (block_flag)
2110 		block_flags.set(DecorationBlock);
2111 
2112 	emit_uniform(var);
2113 	statement("");
2114 }
2115 
emit_buffer_block(const SPIRVariable & var)2116 void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
2117 {
2118 	auto &type = get<SPIRType>(var.basetype);
2119 	bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
2120 
2121 	if (flattened_buffer_blocks.count(var.self))
2122 		emit_buffer_block_flattened(var);
2123 	else if (is_legacy() || (!options.es && options.version == 130) ||
2124 	         (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
2125 		emit_buffer_block_legacy(var);
2126 	else
2127 		emit_buffer_block_native(var);
2128 }
2129 
emit_buffer_block_legacy(const SPIRVariable & var)2130 void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
2131 {
2132 	auto &type = get<SPIRType>(var.basetype);
2133 	bool ssbo = var.storage == StorageClassStorageBuffer ||
2134 	            ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2135 	if (ssbo)
2136 		SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
2137 
2138 	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2139 	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2140 	auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2141 	bool block_flag = block_flags.get(DecorationBlock);
2142 	block_flags.clear(DecorationBlock);
2143 	emit_struct(type);
2144 	if (block_flag)
2145 		block_flags.set(DecorationBlock);
2146 	emit_uniform(var);
2147 	statement("");
2148 }
2149 
emit_buffer_reference_block(SPIRType & type,bool forward_declaration)2150 void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration)
2151 {
2152 	string buffer_name;
2153 
2154 	if (forward_declaration)
2155 	{
2156 		// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2157 		// Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
2158 		// The names must match up.
2159 		buffer_name = to_name(type.self, false);
2160 
2161 		// Shaders never use the block by interface name, so we don't
2162 		// have to track this other than updating name caches.
2163 		// If we have a collision for any reason, just fallback immediately.
2164 		if (ir.meta[type.self].decoration.alias.empty() ||
2165 		    block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
2166 		    resource_names.find(buffer_name) != end(resource_names))
2167 		{
2168 			buffer_name = join("_", type.self);
2169 		}
2170 
2171 		// Make sure we get something unique for both global name scope and block name scope.
2172 		// See GLSL 4.5 spec: section 4.3.9 for details.
2173 		add_variable(block_ssbo_names, resource_names, buffer_name);
2174 
2175 		// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2176 		// This cannot conflict with anything else, so we're safe now.
2177 		// We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2178 		if (buffer_name.empty())
2179 			buffer_name = join("_", type.self);
2180 
2181 		block_names.insert(buffer_name);
2182 		block_ssbo_names.insert(buffer_name);
2183 
2184 		// Ensure we emit the correct name when emitting non-forward pointer type.
2185 		ir.meta[type.self].decoration.alias = buffer_name;
2186 	}
2187 	else if (type.basetype != SPIRType::Struct)
2188 		buffer_name = type_to_glsl(type);
2189 	else
2190 		buffer_name = to_name(type.self, false);
2191 
2192 	if (!forward_declaration)
2193 	{
2194 		if (type.basetype == SPIRType::Struct)
2195 		{
2196 			auto flags = ir.get_buffer_block_type_flags(type);
2197 			string decorations;
2198 			if (flags.get(DecorationRestrict))
2199 				decorations += " restrict";
2200 			if (flags.get(DecorationCoherent))
2201 				decorations += " coherent";
2202 			if (flags.get(DecorationNonReadable))
2203 				decorations += " writeonly";
2204 			if (flags.get(DecorationNonWritable))
2205 				decorations += " readonly";
2206 			statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true),
2207 			          ")", decorations, " buffer ", buffer_name);
2208 		}
2209 		else
2210 			statement("layout(buffer_reference) buffer ", buffer_name);
2211 
2212 		begin_scope();
2213 
2214 		if (type.basetype == SPIRType::Struct)
2215 		{
2216 			type.member_name_cache.clear();
2217 
2218 			uint32_t i = 0;
2219 			for (auto &member : type.member_types)
2220 			{
2221 				add_member_name(type, i);
2222 				emit_struct_member(type, member, i);
2223 				i++;
2224 			}
2225 		}
2226 		else
2227 		{
2228 			auto &pointee_type = get_pointee_type(type);
2229 			statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
2230 		}
2231 
2232 		end_scope_decl();
2233 		statement("");
2234 	}
2235 	else
2236 	{
2237 		statement("layout(buffer_reference) buffer ", buffer_name, ";");
2238 	}
2239 }
2240 
emit_buffer_block_native(const SPIRVariable & var)2241 void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2242 {
2243 	auto &type = get<SPIRType>(var.basetype);
2244 
2245 	Bitset flags = ir.get_buffer_block_flags(var);
2246 	bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2247 	            ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2248 	bool is_restrict = ssbo && flags.get(DecorationRestrict);
2249 	bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
2250 	bool is_readonly = ssbo && flags.get(DecorationNonWritable);
2251 	bool is_coherent = ssbo && flags.get(DecorationCoherent);
2252 
2253 	// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2254 	auto buffer_name = to_name(type.self, false);
2255 
2256 	auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
2257 
2258 	// Shaders never use the block by interface name, so we don't
2259 	// have to track this other than updating name caches.
2260 	// If we have a collision for any reason, just fallback immediately.
2261 	if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
2262 	    resource_names.find(buffer_name) != end(resource_names))
2263 	{
2264 		buffer_name = get_block_fallback_name(var.self);
2265 	}
2266 
2267 	// Make sure we get something unique for both global name scope and block name scope.
2268 	// See GLSL 4.5 spec: section 4.3.9 for details.
2269 	add_variable(block_namespace, resource_names, buffer_name);
2270 
2271 	// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2272 	// This cannot conflict with anything else, so we're safe now.
2273 	// We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2274 	if (buffer_name.empty())
2275 		buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2276 
2277 	block_names.insert(buffer_name);
2278 	block_namespace.insert(buffer_name);
2279 
2280 	// Save for post-reflection later.
2281 	declared_block_names[var.self] = buffer_name;
2282 
2283 	statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
2284 	          is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
2285 	          buffer_name);
2286 
2287 	begin_scope();
2288 
2289 	type.member_name_cache.clear();
2290 
2291 	uint32_t i = 0;
2292 	for (auto &member : type.member_types)
2293 	{
2294 		add_member_name(type, i);
2295 		emit_struct_member(type, member, i);
2296 		i++;
2297 	}
2298 
2299 	// var.self can be used as a backup name for the block name,
2300 	// so we need to make sure we don't disturb the name here on a recompile.
2301 	// It will need to be reset if we have to recompile.
2302 	preserve_alias_on_reset(var.self);
2303 	add_resource_name(var.self);
2304 	end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
2305 	statement("");
2306 }
2307 
emit_buffer_block_flattened(const SPIRVariable & var)2308 void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
2309 {
2310 	auto &type = get<SPIRType>(var.basetype);
2311 
2312 	// Block names should never alias.
2313 	auto buffer_name = to_name(type.self, false);
2314 	size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
2315 
2316 	SPIRType::BaseType basic_type;
2317 	if (get_common_basic_type(type, basic_type))
2318 	{
2319 		SPIRType tmp;
2320 		tmp.basetype = basic_type;
2321 		tmp.vecsize = 4;
2322 		if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
2323 			SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
2324 
2325 		auto flags = ir.get_buffer_block_flags(var);
2326 		statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
2327 		          buffer_size, "];");
2328 	}
2329 	else
2330 		SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
2331 }
2332 
to_storage_qualifiers_glsl(const SPIRVariable & var)2333 const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2334 {
2335 	auto &execution = get_entry_point();
2336 
2337 	if (subpass_input_is_framebuffer_fetch(var.self))
2338 		return "";
2339 
2340 	if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
2341 	{
2342 		if (is_legacy() && execution.model == ExecutionModelVertex)
2343 			return var.storage == StorageClassInput ? "attribute " : "varying ";
2344 		else if (is_legacy() && execution.model == ExecutionModelFragment)
2345 			return "varying "; // Fragment outputs are renamed so they never hit this case.
2346 		else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
2347 		{
2348 			uint32_t loc = get_decoration(var.self, DecorationLocation);
2349 			bool is_inout = location_is_framebuffer_fetch(loc);
2350 			if (is_inout)
2351 				return "inout ";
2352 			else
2353 				return "out ";
2354 		}
2355 		else
2356 			return var.storage == StorageClassInput ? "in " : "out ";
2357 	}
2358 	else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
2359 	         var.storage == StorageClassPushConstant)
2360 	{
2361 		return "uniform ";
2362 	}
2363 	else if (var.storage == StorageClassRayPayloadKHR)
2364 	{
2365 		return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
2366 	}
2367 	else if (var.storage == StorageClassIncomingRayPayloadKHR)
2368 	{
2369 		return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
2370 	}
2371 	else if (var.storage == StorageClassHitAttributeKHR)
2372 	{
2373 		return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
2374 	}
2375 	else if (var.storage == StorageClassCallableDataKHR)
2376 	{
2377 		return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
2378 	}
2379 	else if (var.storage == StorageClassIncomingCallableDataKHR)
2380 	{
2381 		return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
2382 	}
2383 
2384 	return "";
2385 }
2386 
emit_flattened_io_block_member(const std::string & basename,const SPIRType & type,const char * qual,const SmallVector<uint32_t> & indices)2387 void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
2388                                                   const SmallVector<uint32_t> &indices)
2389 {
2390 	uint32_t member_type_id = type.self;
2391 	const SPIRType *member_type = &type;
2392 	const SPIRType *parent_type = nullptr;
2393 	auto flattened_name = basename;
2394 	for (auto &index : indices)
2395 	{
2396 		flattened_name += "_";
2397 		flattened_name += to_member_name(*member_type, index);
2398 		parent_type = member_type;
2399 		member_type_id = member_type->member_types[index];
2400 		member_type = &get<SPIRType>(member_type_id);
2401 	}
2402 
2403 	assert(member_type->basetype != SPIRType::Struct);
2404 
2405 	// We're overriding struct member names, so ensure we do so on the primary type.
2406 	if (parent_type->type_alias)
2407 		parent_type = &get<SPIRType>(parent_type->type_alias);
2408 
2409 	// Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
2410 	// which is not allowed.
2411 	ParsedIR::sanitize_underscores(flattened_name);
2412 
2413 	uint32_t last_index = indices.back();
2414 
2415 	// Pass in the varying qualifier here so it will appear in the correct declaration order.
2416 	// Replace member name while emitting it so it encodes both struct name and member name.
2417 	auto backup_name = get_member_name(parent_type->self, last_index);
2418 	auto member_name = to_member_name(*parent_type, last_index);
2419 	set_member_name(parent_type->self, last_index, flattened_name);
2420 	emit_struct_member(*parent_type, member_type_id, last_index, qual);
2421 	// Restore member name.
2422 	set_member_name(parent_type->self, last_index, member_name);
2423 }
2424 
emit_flattened_io_block_struct(const std::string & basename,const SPIRType & type,const char * qual,const SmallVector<uint32_t> & indices)2425 void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
2426                                                   const SmallVector<uint32_t> &indices)
2427 {
2428 	auto sub_indices = indices;
2429 	sub_indices.push_back(0);
2430 
2431 	const SPIRType *member_type = &type;
2432 	for (auto &index : indices)
2433 		member_type = &get<SPIRType>(member_type->member_types[index]);
2434 
2435 	assert(member_type->basetype == SPIRType::Struct);
2436 
2437 	if (!member_type->array.empty())
2438 		SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
2439 
2440 	for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
2441 	{
2442 		sub_indices.back() = i;
2443 		if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
2444 			emit_flattened_io_block_struct(basename, type, qual, sub_indices);
2445 		else
2446 			emit_flattened_io_block_member(basename, type, qual, sub_indices);
2447 	}
2448 }
2449 
emit_flattened_io_block(const SPIRVariable & var,const char * qual)2450 void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
2451 {
2452 	auto &var_type = get<SPIRType>(var.basetype);
2453 	if (!var_type.array.empty())
2454 		SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
2455 
2456 	// Emit flattened types based on the type alias. Normally, we are never supposed to emit
2457 	// struct declarations for aliased types.
2458 	auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
2459 
2460 	auto old_flags = ir.meta[type.self].decoration.decoration_flags;
2461 	// Emit the members as if they are part of a block to get all qualifiers.
2462 	ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
2463 
2464 	type.member_name_cache.clear();
2465 
2466 	SmallVector<uint32_t> member_indices;
2467 	member_indices.push_back(0);
2468 	auto basename = to_name(var.self);
2469 
2470 	uint32_t i = 0;
2471 	for (auto &member : type.member_types)
2472 	{
2473 		add_member_name(type, i);
2474 		auto &membertype = get<SPIRType>(member);
2475 
2476 		member_indices.back() = i;
2477 		if (membertype.basetype == SPIRType::Struct)
2478 			emit_flattened_io_block_struct(basename, type, qual, member_indices);
2479 		else
2480 			emit_flattened_io_block_member(basename, type, qual, member_indices);
2481 		i++;
2482 	}
2483 
2484 	ir.meta[type.self].decoration.decoration_flags = old_flags;
2485 
2486 	// Treat this variable as fully flattened from now on.
2487 	flattened_structs[var.self] = true;
2488 }
2489 
emit_interface_block(const SPIRVariable & var)2490 void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2491 {
2492 	auto &type = get<SPIRType>(var.basetype);
2493 
2494 	if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
2495 	    !options.es && options.version < 410)
2496 	{
2497 		require_extension_internal("GL_ARB_vertex_attrib_64bit");
2498 	}
2499 
2500 	// Either make it plain in/out or in/out blocks depending on what shader is doing ...
2501 	bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
2502 	const char *qual = to_storage_qualifiers_glsl(var);
2503 
2504 	if (block)
2505 	{
2506 		// ESSL earlier than 310 and GLSL earlier than 150 did not support
2507 		// I/O variables which are struct types.
2508 		// To support this, flatten the struct into separate varyings instead.
2509 		if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2510 		    (!options.es && options.version < 150))
2511 		{
2512 			// I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2513 			// On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2514 			emit_flattened_io_block(var, qual);
2515 		}
2516 		else
2517 		{
2518 			if (options.es && options.version < 320)
2519 			{
2520 				// Geometry and tessellation extensions imply this extension.
2521 				if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
2522 					require_extension_internal("GL_EXT_shader_io_blocks");
2523 			}
2524 
2525 			// Workaround to make sure we can emit "patch in/out" correctly.
2526 			fixup_io_block_patch_qualifiers(var);
2527 
2528 			// Block names should never alias.
2529 			auto block_name = to_name(type.self, false);
2530 
2531 			// The namespace for I/O blocks is separate from other variables in GLSL.
2532 			auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2533 
2534 			// Shaders never use the block by interface name, so we don't
2535 			// have to track this other than updating name caches.
2536 			if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
2537 				block_name = get_fallback_name(type.self);
2538 			else
2539 				block_namespace.insert(block_name);
2540 
2541 			// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2542 			// This cannot conflict with anything else, so we're safe now.
2543 			if (block_name.empty())
2544 				block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2545 
2546 			// Instance names cannot alias block names.
2547 			resource_names.insert(block_name);
2548 
2549 			bool is_patch = has_decoration(var.self, DecorationPatch);
2550 			statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
2551 			begin_scope();
2552 
2553 			type.member_name_cache.clear();
2554 
2555 			uint32_t i = 0;
2556 			for (auto &member : type.member_types)
2557 			{
2558 				add_member_name(type, i);
2559 				emit_struct_member(type, member, i);
2560 				i++;
2561 			}
2562 
2563 			add_resource_name(var.self);
2564 			end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
2565 			statement("");
2566 		}
2567 	}
2568 	else
2569 	{
2570 		// ESSL earlier than 310 and GLSL earlier than 150 did not support
2571 		// I/O variables which are struct types.
2572 		// To support this, flatten the struct into separate varyings instead.
2573 		if (type.basetype == SPIRType::Struct &&
2574 		    (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2575 		     (!options.es && options.version < 150)))
2576 		{
2577 			emit_flattened_io_block(var, qual);
2578 		}
2579 		else
2580 		{
2581 			add_resource_name(var.self);
2582 
2583 			// Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
2584 			// Opt for unsized as it's the more "correct" variant to use.
2585 			bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
2586 			                                 !has_decoration(var.self, DecorationPatch) &&
2587 			                                 (get_entry_point().model == ExecutionModelTessellationControl ||
2588 			                                  get_entry_point().model == ExecutionModelTessellationEvaluation);
2589 
2590 			uint32_t old_array_size = 0;
2591 			bool old_array_size_literal = true;
2592 
2593 			if (control_point_input_array)
2594 			{
2595 				swap(type.array.back(), old_array_size);
2596 				swap(type.array_size_literal.back(), old_array_size_literal);
2597 			}
2598 
2599 			statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
2600 			          variable_decl(type, to_name(var.self), var.self), ";");
2601 
2602 			if (control_point_input_array)
2603 			{
2604 				swap(type.array.back(), old_array_size);
2605 				swap(type.array_size_literal.back(), old_array_size_literal);
2606 			}
2607 		}
2608 	}
2609 }
2610 
emit_uniform(const SPIRVariable & var)2611 void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2612 {
2613 	auto &type = get<SPIRType>(var.basetype);
2614 	if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
2615 	{
2616 		if (!options.es && options.version < 420)
2617 			require_extension_internal("GL_ARB_shader_image_load_store");
2618 		else if (options.es && options.version < 310)
2619 			SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2620 	}
2621 
2622 	add_resource_name(var.self);
2623 	statement(layout_for_variable(var), variable_decl(var), ";");
2624 }
2625 
constant_value_macro_name(uint32_t id)2626 string CompilerGLSL::constant_value_macro_name(uint32_t id)
2627 {
2628 	return join("SPIRV_CROSS_CONSTANT_ID_", id);
2629 }
2630 
emit_specialization_constant_op(const SPIRConstantOp & constant)2631 void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2632 {
2633 	auto &type = get<SPIRType>(constant.basetype);
2634 	auto name = to_name(constant.self);
2635 	statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
2636 }
2637 
emit_constant(const SPIRConstant & constant)2638 void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2639 {
2640 	auto &type = get<SPIRType>(constant.constant_type);
2641 	auto name = to_name(constant.self);
2642 
2643 	SpecializationConstant wg_x, wg_y, wg_z;
2644 	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2645 
2646 	// This specialization constant is implicitly declared by emitting layout() in;
2647 	if (constant.self == workgroup_size_id)
2648 		return;
2649 
2650 	// These specialization constants are implicitly declared by emitting layout() in;
2651 	// In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2652 	// later can use macro overrides for work group size.
2653 	bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2654 	                                  ConstantID(constant.self) == wg_z.id;
2655 
2656 	if (options.vulkan_semantics && is_workgroup_size_constant)
2657 	{
2658 		// Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2659 		return;
2660 	}
2661 	else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2662 	         !has_decoration(constant.self, DecorationSpecId))
2663 	{
2664 		// Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2665 		return;
2666 	}
2667 
2668 	// Only scalars have constant IDs.
2669 	if (has_decoration(constant.self, DecorationSpecId))
2670 	{
2671 		if (options.vulkan_semantics)
2672 		{
2673 			statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
2674 			          variable_decl(type, name), " = ", constant_expression(constant), ";");
2675 		}
2676 		else
2677 		{
2678 			const string &macro_name = constant.specialization_constant_macro_name;
2679 			statement("#ifndef ", macro_name);
2680 			statement("#define ", macro_name, " ", constant_expression(constant));
2681 			statement("#endif");
2682 
2683 			// For workgroup size constants, only emit the macros.
2684 			if (!is_workgroup_size_constant)
2685 				statement("const ", variable_decl(type, name), " = ", macro_name, ";");
2686 		}
2687 	}
2688 	else
2689 	{
2690 		statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
2691 	}
2692 }
2693 
emit_entry_point_declarations()2694 void CompilerGLSL::emit_entry_point_declarations()
2695 {
2696 }
2697 
replace_illegal_names(const unordered_set<string> & keywords)2698 void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
2699 {
2700 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
2701 		if (is_hidden_variable(var))
2702 			return;
2703 
2704 		auto *meta = ir.find_meta(var.self);
2705 		if (!meta)
2706 			return;
2707 
2708 		auto &m = meta->decoration;
2709 		if (keywords.find(m.alias) != end(keywords))
2710 			m.alias = join("_", m.alias);
2711 	});
2712 
2713 	ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
2714 		auto *meta = ir.find_meta(func.self);
2715 		if (!meta)
2716 			return;
2717 
2718 		auto &m = meta->decoration;
2719 		if (keywords.find(m.alias) != end(keywords))
2720 			m.alias = join("_", m.alias);
2721 	});
2722 
2723 	ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
2724 		auto *meta = ir.find_meta(type.self);
2725 		if (!meta)
2726 			return;
2727 
2728 		auto &m = meta->decoration;
2729 		if (keywords.find(m.alias) != end(keywords))
2730 			m.alias = join("_", m.alias);
2731 
2732 		for (auto &memb : meta->members)
2733 			if (keywords.find(memb.alias) != end(keywords))
2734 				memb.alias = join("_", memb.alias);
2735 	});
2736 }
2737 
replace_illegal_names()2738 void CompilerGLSL::replace_illegal_names()
2739 {
2740 	// clang-format off
2741 	static const unordered_set<string> keywords = {
2742 		"abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2743 		"atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2744 		"atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2745 		"bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2746 		"ceil", "cos", "cosh", "cross", "degrees",
2747 		"dFdx", "dFdxCoarse", "dFdxFine",
2748 		"dFdy", "dFdyCoarse", "dFdyFine",
2749 		"distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2750 		"faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2751 		"frexp", "fwidth", "fwidthCoarse", "fwidthFine",
2752 		"greaterThan", "greaterThanEqual", "groupMemoryBarrier",
2753 		"imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
2754 		"imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
2755 		"inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
2756 		"matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
2757 		"min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
2758 		"outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
2759 		"packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
2760 		"radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
2761 		"tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
2762 		"textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
2763 		"textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
2764 		"transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
2765 		"unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
2766 
2767 		"active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
2768 		"bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
2769 		"dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
2770 		"do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
2771 		"for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
2772 		"iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
2773 		"iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
2774 		"image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
2775 		"isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
2776 		"isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
2777 		"mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
2778 		"namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
2779 		"resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
2780 		"sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
2781 		"sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
2782 		"samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
2783 		"struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
2784 		"uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
2785 		"uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
2786 		"usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
2787 		"usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
2788 		"while", "writeonly",
2789 	};
2790 	// clang-format on
2791 
2792 	replace_illegal_names(keywords);
2793 }
2794 
replace_fragment_output(SPIRVariable & var)2795 void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
2796 {
2797 	auto &m = ir.meta[var.self].decoration;
2798 	uint32_t location = 0;
2799 	if (m.decoration_flags.get(DecorationLocation))
2800 		location = m.location;
2801 
2802 	// If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
2803 	// do the access chain part of this for us.
2804 	auto &type = get<SPIRType>(var.basetype);
2805 
2806 	if (type.array.empty())
2807 	{
2808 		// Redirect the write to a specific render target in legacy GLSL.
2809 		m.alias = join("gl_FragData[", location, "]");
2810 
2811 		if (is_legacy_es() && location != 0)
2812 			require_extension_internal("GL_EXT_draw_buffers");
2813 	}
2814 	else if (type.array.size() == 1)
2815 	{
2816 		// If location is non-zero, we probably have to add an offset.
2817 		// This gets really tricky since we'd have to inject an offset in the access chain.
2818 		// FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
2819 		m.alias = "gl_FragData";
2820 		if (location != 0)
2821 			SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
2822 			                  "This is unimplemented in SPIRV-Cross.");
2823 
2824 		if (is_legacy_es())
2825 			require_extension_internal("GL_EXT_draw_buffers");
2826 	}
2827 	else
2828 		SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
2829 
2830 	var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2831 }
2832 
replace_fragment_outputs()2833 void CompilerGLSL::replace_fragment_outputs()
2834 {
2835 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2836 		auto &type = this->get<SPIRType>(var.basetype);
2837 
2838 		if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
2839 			replace_fragment_output(var);
2840 	});
2841 }
2842 
remap_swizzle(const SPIRType & out_type,uint32_t input_components,const string & expr)2843 string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2844 {
2845 	if (out_type.vecsize == input_components)
2846 		return expr;
2847 	else if (input_components == 1 && !backend.can_swizzle_scalar)
2848 		return join(type_to_glsl(out_type), "(", expr, ")");
2849 	else
2850 	{
2851 		// FIXME: This will not work with packed expressions.
2852 		auto e = enclose_expression(expr) + ".";
2853 		// Just clamp the swizzle index if we have more outputs than inputs.
2854 		for (uint32_t c = 0; c < out_type.vecsize; c++)
2855 			e += index_to_swizzle(min(c, input_components - 1));
2856 		if (backend.swizzle_is_function && out_type.vecsize > 1)
2857 			e += "()";
2858 
2859 		remove_duplicate_swizzle(e);
2860 		return e;
2861 	}
2862 }
2863 
emit_pls()2864 void CompilerGLSL::emit_pls()
2865 {
2866 	auto &execution = get_entry_point();
2867 	if (execution.model != ExecutionModelFragment)
2868 		SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
2869 
2870 	if (!options.es)
2871 		SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
2872 
2873 	if (options.version < 300)
2874 		SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
2875 
2876 	if (!pls_inputs.empty())
2877 	{
2878 		statement("__pixel_local_inEXT _PLSIn");
2879 		begin_scope();
2880 		for (auto &input : pls_inputs)
2881 			statement(pls_decl(input), ";");
2882 		end_scope_decl();
2883 		statement("");
2884 	}
2885 
2886 	if (!pls_outputs.empty())
2887 	{
2888 		statement("__pixel_local_outEXT _PLSOut");
2889 		begin_scope();
2890 		for (auto &output : pls_outputs)
2891 			statement(pls_decl(output), ";");
2892 		end_scope_decl();
2893 		statement("");
2894 	}
2895 }
2896 
fixup_image_load_store_access()2897 void CompilerGLSL::fixup_image_load_store_access()
2898 {
2899 	if (!options.enable_storage_image_qualifier_deduction)
2900 		return;
2901 
2902 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
2903 		auto &vartype = expression_type(var);
2904 		if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
2905 		{
2906 			// Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
2907 			// Solve this by making the image access as restricted as possible and loosen up if we need to.
2908 			// If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
2909 
2910 			auto &flags = ir.meta[var].decoration.decoration_flags;
2911 			if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
2912 			{
2913 				flags.set(DecorationNonWritable);
2914 				flags.set(DecorationNonReadable);
2915 			}
2916 		}
2917 	});
2918 }
2919 
is_block_builtin(BuiltIn builtin)2920 static bool is_block_builtin(BuiltIn builtin)
2921 {
2922 	return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
2923 	       builtin == BuiltInCullDistance;
2924 }
2925 
should_force_emit_builtin_block(StorageClass storage)2926 bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
2927 {
2928 	// If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
2929 
2930 	if (storage != StorageClassOutput)
2931 		return false;
2932 	bool should_force = false;
2933 
2934 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2935 		if (should_force)
2936 			return;
2937 
2938 		auto &type = this->get<SPIRType>(var.basetype);
2939 		bool block = has_decoration(type.self, DecorationBlock);
2940 		if (var.storage == storage && block && is_builtin_variable(var))
2941 		{
2942 			uint32_t member_count = uint32_t(type.member_types.size());
2943 			for (uint32_t i = 0; i < member_count; i++)
2944 			{
2945 				if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
2946 				    is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
2947 				    has_member_decoration(type.self, i, DecorationOffset))
2948 				{
2949 					should_force = true;
2950 				}
2951 			}
2952 		}
2953 		else if (var.storage == storage && !block && is_builtin_variable(var))
2954 		{
2955 			if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
2956 			    has_decoration(var.self, DecorationOffset))
2957 			{
2958 				should_force = true;
2959 			}
2960 		}
2961 	});
2962 
2963 	// If we're declaring clip/cull planes with control points we need to force block declaration.
2964 	if (get_execution_model() == ExecutionModelTessellationControl &&
2965 	    (clip_distance_count || cull_distance_count))
2966 	{
2967 		should_force = true;
2968 	}
2969 
2970 	return should_force;
2971 }
2972 
fixup_implicit_builtin_block_names()2973 void CompilerGLSL::fixup_implicit_builtin_block_names()
2974 {
2975 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2976 		auto &type = this->get<SPIRType>(var.basetype);
2977 		bool block = has_decoration(type.self, DecorationBlock);
2978 		if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
2979 		    is_builtin_variable(var))
2980 		{
2981 			// Make sure the array has a supported name in the code.
2982 			if (var.storage == StorageClassOutput)
2983 				set_name(var.self, "gl_out");
2984 			else if (var.storage == StorageClassInput)
2985 				set_name(var.self, "gl_in");
2986 		}
2987 	});
2988 }
2989 
emit_declared_builtin_block(StorageClass storage,ExecutionModel model)2990 void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
2991 {
2992 	Bitset emitted_builtins;
2993 	Bitset global_builtins;
2994 	const SPIRVariable *block_var = nullptr;
2995 	bool emitted_block = false;
2996 	bool builtin_array = false;
2997 
2998 	// Need to use declared size in the type.
2999 	// These variables might have been declared, but not statically used, so we haven't deduced their size yet.
3000 	uint32_t cull_distance_size = 0;
3001 	uint32_t clip_distance_size = 0;
3002 
3003 	bool have_xfb_buffer_stride = false;
3004 	bool have_geom_stream = false;
3005 	bool have_any_xfb_offset = false;
3006 	uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
3007 	std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
3008 
3009 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3010 		auto &type = this->get<SPIRType>(var.basetype);
3011 		bool block = has_decoration(type.self, DecorationBlock);
3012 		Bitset builtins;
3013 
3014 		if (var.storage == storage && block && is_builtin_variable(var))
3015 		{
3016 			uint32_t index = 0;
3017 			for (auto &m : ir.meta[type.self].members)
3018 			{
3019 				if (m.builtin)
3020 				{
3021 					builtins.set(m.builtin_type);
3022 					if (m.builtin_type == BuiltInCullDistance)
3023 						cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
3024 					else if (m.builtin_type == BuiltInClipDistance)
3025 						clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
3026 
3027 					if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
3028 					{
3029 						have_any_xfb_offset = true;
3030 						builtin_xfb_offsets[m.builtin_type] = m.offset;
3031 					}
3032 
3033 					if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
3034 					{
3035 						uint32_t stream = m.stream;
3036 						if (have_geom_stream && geom_stream != stream)
3037 							SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3038 						have_geom_stream = true;
3039 						geom_stream = stream;
3040 					}
3041 				}
3042 				index++;
3043 			}
3044 
3045 			if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
3046 			    has_decoration(var.self, DecorationXfbStride))
3047 			{
3048 				uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
3049 				uint32_t stride = get_decoration(var.self, DecorationXfbStride);
3050 				if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3051 					SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3052 				if (have_xfb_buffer_stride && stride != xfb_stride)
3053 					SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3054 				have_xfb_buffer_stride = true;
3055 				xfb_buffer = buffer_index;
3056 				xfb_stride = stride;
3057 			}
3058 
3059 			if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
3060 			{
3061 				uint32_t stream = get_decoration(var.self, DecorationStream);
3062 				if (have_geom_stream && geom_stream != stream)
3063 					SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3064 				have_geom_stream = true;
3065 				geom_stream = stream;
3066 			}
3067 		}
3068 		else if (var.storage == storage && !block && is_builtin_variable(var))
3069 		{
3070 			// While we're at it, collect all declared global builtins (HLSL mostly ...).
3071 			auto &m = ir.meta[var.self].decoration;
3072 			if (m.builtin)
3073 			{
3074 				global_builtins.set(m.builtin_type);
3075 				if (m.builtin_type == BuiltInCullDistance)
3076 					cull_distance_size = to_array_size_literal(type);
3077 				else if (m.builtin_type == BuiltInClipDistance)
3078 					clip_distance_size = to_array_size_literal(type);
3079 
3080 				if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
3081 				    m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
3082 				{
3083 					have_any_xfb_offset = true;
3084 					builtin_xfb_offsets[m.builtin_type] = m.offset;
3085 					uint32_t buffer_index = m.xfb_buffer;
3086 					uint32_t stride = m.xfb_stride;
3087 					if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3088 						SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3089 					if (have_xfb_buffer_stride && stride != xfb_stride)
3090 						SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3091 					have_xfb_buffer_stride = true;
3092 					xfb_buffer = buffer_index;
3093 					xfb_stride = stride;
3094 				}
3095 
3096 				if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
3097 				{
3098 					uint32_t stream = get_decoration(var.self, DecorationStream);
3099 					if (have_geom_stream && geom_stream != stream)
3100 						SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3101 					have_geom_stream = true;
3102 					geom_stream = stream;
3103 				}
3104 			}
3105 		}
3106 
3107 		if (builtins.empty())
3108 			return;
3109 
3110 		if (emitted_block)
3111 			SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
3112 
3113 		emitted_builtins = builtins;
3114 		emitted_block = true;
3115 		builtin_array = !type.array.empty();
3116 		block_var = &var;
3117 	});
3118 
3119 	global_builtins =
3120 	    Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
3121 	                                          (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
3122 
3123 	// Try to collect all other declared builtins.
3124 	if (!emitted_block)
3125 		emitted_builtins = global_builtins;
3126 
3127 	// Can't declare an empty interface block.
3128 	if (emitted_builtins.empty())
3129 		return;
3130 
3131 	if (storage == StorageClassOutput)
3132 	{
3133 		SmallVector<string> attr;
3134 		if (have_xfb_buffer_stride && have_any_xfb_offset)
3135 		{
3136 			if (!options.es)
3137 			{
3138 				if (options.version < 440 && options.version >= 140)
3139 					require_extension_internal("GL_ARB_enhanced_layouts");
3140 				else if (options.version < 140)
3141 					SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
3142 				if (!options.es && options.version < 440)
3143 					require_extension_internal("GL_ARB_enhanced_layouts");
3144 			}
3145 			else if (options.es)
3146 				SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
3147 			attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
3148 		}
3149 
3150 		if (have_geom_stream)
3151 		{
3152 			if (get_execution_model() != ExecutionModelGeometry)
3153 				SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
3154 			if (options.es)
3155 				SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
3156 			if (options.version < 400)
3157 				require_extension_internal("GL_ARB_transform_feedback3");
3158 			attr.push_back(join("stream = ", geom_stream));
3159 		}
3160 
3161 		if (!attr.empty())
3162 			statement("layout(", merge(attr), ") out gl_PerVertex");
3163 		else
3164 			statement("out gl_PerVertex");
3165 	}
3166 	else
3167 	{
3168 		// If we have passthrough, there is no way PerVertex cannot be passthrough.
3169 		if (get_entry_point().geometry_passthrough)
3170 			statement("layout(passthrough) in gl_PerVertex");
3171 		else
3172 			statement("in gl_PerVertex");
3173 	}
3174 
3175 	begin_scope();
3176 	if (emitted_builtins.get(BuiltInPosition))
3177 	{
3178 		auto itr = builtin_xfb_offsets.find(BuiltInPosition);
3179 		if (itr != end(builtin_xfb_offsets))
3180 			statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
3181 		else
3182 			statement("vec4 gl_Position;");
3183 	}
3184 
3185 	if (emitted_builtins.get(BuiltInPointSize))
3186 	{
3187 		auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
3188 		if (itr != end(builtin_xfb_offsets))
3189 			statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
3190 		else
3191 			statement("float gl_PointSize;");
3192 	}
3193 
3194 	if (emitted_builtins.get(BuiltInClipDistance))
3195 	{
3196 		auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
3197 		if (itr != end(builtin_xfb_offsets))
3198 			statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
3199 		else
3200 			statement("float gl_ClipDistance[", clip_distance_size, "];");
3201 	}
3202 
3203 	if (emitted_builtins.get(BuiltInCullDistance))
3204 	{
3205 		auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
3206 		if (itr != end(builtin_xfb_offsets))
3207 			statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
3208 		else
3209 			statement("float gl_CullDistance[", cull_distance_size, "];");
3210 	}
3211 
3212 	if (builtin_array)
3213 	{
3214 		if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
3215 			end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
3216 		else
3217 			end_scope_decl(join(to_name(block_var->self), "[]"));
3218 	}
3219 	else
3220 		end_scope_decl();
3221 	statement("");
3222 }
3223 
declare_undefined_values()3224 void CompilerGLSL::declare_undefined_values()
3225 {
3226 	bool emitted = false;
3227 	ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
3228 		auto &type = this->get<SPIRType>(undef.basetype);
3229 		// OpUndef can be void for some reason ...
3230 		if (type.basetype == SPIRType::Void)
3231 			return;
3232 
3233 		string initializer;
3234 		if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
3235 			initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
3236 
3237 		statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
3238 		emitted = true;
3239 	});
3240 
3241 	if (emitted)
3242 		statement("");
3243 }
3244 
variable_is_lut(const SPIRVariable & var) const3245 bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
3246 {
3247 	bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
3248 
3249 	if (statically_assigned)
3250 	{
3251 		auto *constant = maybe_get<SPIRConstant>(var.static_expression);
3252 		if (constant && constant->is_used_as_lut)
3253 			return true;
3254 	}
3255 
3256 	return false;
3257 }
3258 
emit_resources()3259 void CompilerGLSL::emit_resources()
3260 {
3261 	auto &execution = get_entry_point();
3262 
3263 	replace_illegal_names();
3264 
3265 	// Legacy GL uses gl_FragData[], redeclare all fragment outputs
3266 	// with builtins.
3267 	if (execution.model == ExecutionModelFragment && is_legacy())
3268 		replace_fragment_outputs();
3269 
3270 	// Emit PLS blocks if we have such variables.
3271 	if (!pls_inputs.empty() || !pls_outputs.empty())
3272 		emit_pls();
3273 
3274 	switch (execution.model)
3275 	{
3276 	case ExecutionModelGeometry:
3277 	case ExecutionModelTessellationControl:
3278 	case ExecutionModelTessellationEvaluation:
3279 		fixup_implicit_builtin_block_names();
3280 		break;
3281 
3282 	default:
3283 		break;
3284 	}
3285 
3286 	// Emit custom gl_PerVertex for SSO compatibility.
3287 	if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
3288 	{
3289 		switch (execution.model)
3290 		{
3291 		case ExecutionModelGeometry:
3292 		case ExecutionModelTessellationControl:
3293 		case ExecutionModelTessellationEvaluation:
3294 			emit_declared_builtin_block(StorageClassInput, execution.model);
3295 			emit_declared_builtin_block(StorageClassOutput, execution.model);
3296 			break;
3297 
3298 		case ExecutionModelVertex:
3299 			emit_declared_builtin_block(StorageClassOutput, execution.model);
3300 			break;
3301 
3302 		default:
3303 			break;
3304 		}
3305 	}
3306 	else if (should_force_emit_builtin_block(StorageClassOutput))
3307 	{
3308 		emit_declared_builtin_block(StorageClassOutput, execution.model);
3309 	}
3310 	else if (execution.geometry_passthrough)
3311 	{
3312 		// Need to declare gl_in with Passthrough.
3313 		// If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
3314 		emit_declared_builtin_block(StorageClassInput, execution.model);
3315 	}
3316 	else
3317 	{
3318 		// Need to redeclare clip/cull distance with explicit size to use them.
3319 		// SPIR-V mandates these builtins have a size declared.
3320 		const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
3321 		if (clip_distance_count != 0)
3322 			statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
3323 		if (cull_distance_count != 0)
3324 			statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
3325 		if (clip_distance_count != 0 || cull_distance_count != 0)
3326 			statement("");
3327 	}
3328 
3329 	if (position_invariant)
3330 	{
3331 		statement("invariant gl_Position;");
3332 		statement("");
3333 	}
3334 
3335 	bool emitted = false;
3336 
3337 	// If emitted Vulkan GLSL,
3338 	// emit specialization constants as actual floats,
3339 	// spec op expressions will redirect to the constant name.
3340 	//
3341 	{
3342 		auto loop_lock = ir.create_loop_hard_lock();
3343 		for (auto &id_ : ir.ids_for_constant_or_type)
3344 		{
3345 			auto &id = ir.ids[id_];
3346 
3347 			if (id.get_type() == TypeConstant)
3348 			{
3349 				auto &c = id.get<SPIRConstant>();
3350 
3351 				bool needs_declaration = c.specialization || c.is_used_as_lut;
3352 
3353 				if (needs_declaration)
3354 				{
3355 					if (!options.vulkan_semantics && c.specialization)
3356 					{
3357 						c.specialization_constant_macro_name =
3358 						    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
3359 					}
3360 					emit_constant(c);
3361 					emitted = true;
3362 				}
3363 			}
3364 			else if (id.get_type() == TypeConstantOp)
3365 			{
3366 				emit_specialization_constant_op(id.get<SPIRConstantOp>());
3367 				emitted = true;
3368 			}
3369 			else if (id.get_type() == TypeType)
3370 			{
3371 				auto *type = &id.get<SPIRType>();
3372 
3373 				bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
3374 				                         (!has_decoration(type->self, DecorationBlock) &&
3375 				                          !has_decoration(type->self, DecorationBufferBlock));
3376 
3377 				// Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
3378 				if (type->basetype == SPIRType::Struct && type->pointer &&
3379 				    has_decoration(type->self, DecorationBlock) &&
3380 				    (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
3381 				     type->storage == StorageClassHitAttributeKHR))
3382 				{
3383 					type = &get<SPIRType>(type->parent_type);
3384 					is_natural_struct = true;
3385 				}
3386 
3387 				if (is_natural_struct)
3388 				{
3389 					if (emitted)
3390 						statement("");
3391 					emitted = false;
3392 
3393 					emit_struct(*type);
3394 				}
3395 			}
3396 		}
3397 	}
3398 
3399 	if (emitted)
3400 		statement("");
3401 
3402 	// If we needed to declare work group size late, check here.
3403 	// If the work group size depends on a specialization constant, we need to declare the layout() block
3404 	// after constants (and their macros) have been declared.
3405 	if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
3406 	    execution.workgroup_size.constant != 0)
3407 	{
3408 		SpecializationConstant wg_x, wg_y, wg_z;
3409 		get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
3410 
3411 		if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
3412 		{
3413 			SmallVector<string> inputs;
3414 			build_workgroup_size(inputs, wg_x, wg_y, wg_z);
3415 			statement("layout(", merge(inputs), ") in;");
3416 			statement("");
3417 		}
3418 	}
3419 
3420 	emitted = false;
3421 
3422 	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
3423 	{
3424 		for (auto type : physical_storage_non_block_pointer_types)
3425 		{
3426 			emit_buffer_reference_block(get<SPIRType>(type), false);
3427 		}
3428 
3429 		// Output buffer reference blocks.
3430 		// Do this in two stages, one with forward declaration,
3431 		// and one without. Buffer reference blocks can reference themselves
3432 		// to support things like linked lists.
3433 		ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
3434 			bool has_block_flags = has_decoration(type.self, DecorationBlock);
3435 			if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3436 			    type.storage == StorageClassPhysicalStorageBufferEXT)
3437 			{
3438 				emit_buffer_reference_block(type, true);
3439 			}
3440 		});
3441 
3442 		ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
3443 			bool has_block_flags = has_decoration(type.self, DecorationBlock);
3444 			if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3445 			    type.storage == StorageClassPhysicalStorageBufferEXT)
3446 			{
3447 				emit_buffer_reference_block(type, false);
3448 			}
3449 		});
3450 	}
3451 
3452 	// Output UBOs and SSBOs
3453 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3454 		auto &type = this->get<SPIRType>(var.basetype);
3455 
3456 		bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
3457 		                        type.storage == StorageClassShaderRecordBufferKHR;
3458 		bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
3459 		                       ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
3460 
3461 		if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
3462 		    has_block_flags)
3463 		{
3464 			emit_buffer_block(var);
3465 		}
3466 	});
3467 
3468 	// Output push constant blocks
3469 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3470 		auto &type = this->get<SPIRType>(var.basetype);
3471 		if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
3472 		    !is_hidden_variable(var))
3473 		{
3474 			emit_push_constant_block(var);
3475 		}
3476 	});
3477 
3478 	bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
3479 
3480 	// Output Uniform Constants (values, samplers, images, etc).
3481 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3482 		auto &type = this->get<SPIRType>(var.basetype);
3483 
3484 		// If we're remapping separate samplers and images, only emit the combined samplers.
3485 		if (skip_separate_image_sampler)
3486 		{
3487 			// Sampler buffers are always used without a sampler, and they will also work in regular GL.
3488 			bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
3489 			bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3490 			bool separate_sampler = type.basetype == SPIRType::Sampler;
3491 			if (!sampler_buffer && (separate_image || separate_sampler))
3492 				return;
3493 		}
3494 
3495 		if (var.storage != StorageClassFunction && type.pointer &&
3496 		    (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
3497 		     type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
3498 		     type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
3499 		     type.storage == StorageClassHitAttributeKHR) &&
3500 		    !is_hidden_variable(var))
3501 		{
3502 			emit_uniform(var);
3503 			emitted = true;
3504 		}
3505 	});
3506 
3507 	if (emitted)
3508 		statement("");
3509 	emitted = false;
3510 
3511 	bool emitted_base_instance = false;
3512 
3513 	// Output in/out interfaces.
3514 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3515 		auto &type = this->get<SPIRType>(var.basetype);
3516 
3517 		bool is_hidden = is_hidden_variable(var);
3518 
3519 		// Unused output I/O variables might still be required to implement framebuffer fetch.
3520 		if (var.storage == StorageClassOutput && !is_legacy() &&
3521 		    location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0)
3522 		{
3523 			is_hidden = false;
3524 		}
3525 
3526 		if (var.storage != StorageClassFunction && type.pointer &&
3527 		    (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
3528 		    interface_variable_exists_in_entry_point(var.self) && !is_hidden)
3529 		{
3530 			emit_interface_block(var);
3531 			emitted = true;
3532 		}
3533 		else if (is_builtin_variable(var))
3534 		{
3535 			auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
3536 			// For gl_InstanceIndex emulation on GLES, the API user needs to
3537 			// supply this uniform.
3538 
3539 			// The draw parameter extension is soft-enabled on GL with some fallbacks.
3540 			if (!options.vulkan_semantics)
3541 			{
3542 				if (!emitted_base_instance &&
3543 				    ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
3544 				     (builtin == BuiltInBaseInstance)))
3545 				{
3546 					statement("#ifdef GL_ARB_shader_draw_parameters");
3547 					statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
3548 					statement("#else");
3549 					// A crude, but simple workaround which should be good enough for non-indirect draws.
3550 					statement("uniform int SPIRV_Cross_BaseInstance;");
3551 					statement("#endif");
3552 					emitted = true;
3553 					emitted_base_instance = true;
3554 				}
3555 				else if (builtin == BuiltInBaseVertex)
3556 				{
3557 					statement("#ifdef GL_ARB_shader_draw_parameters");
3558 					statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
3559 					statement("#else");
3560 					// A crude, but simple workaround which should be good enough for non-indirect draws.
3561 					statement("uniform int SPIRV_Cross_BaseVertex;");
3562 					statement("#endif");
3563 				}
3564 				else if (builtin == BuiltInDrawIndex)
3565 				{
3566 					statement("#ifndef GL_ARB_shader_draw_parameters");
3567 					// Cannot really be worked around.
3568 					statement("#error GL_ARB_shader_draw_parameters is not supported.");
3569 					statement("#endif");
3570 				}
3571 			}
3572 		}
3573 	});
3574 
3575 	// Global variables.
3576 	for (auto global : global_variables)
3577 	{
3578 		auto &var = get<SPIRVariable>(global);
3579 		if (is_hidden_variable(var, true))
3580 			continue;
3581 
3582 		if (var.storage != StorageClassOutput)
3583 		{
3584 			if (!variable_is_lut(var))
3585 			{
3586 				add_resource_name(var.self);
3587 
3588 				string initializer;
3589 				if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
3590 				    !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
3591 				{
3592 					initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
3593 				}
3594 
3595 				statement(variable_decl(var), initializer, ";");
3596 				emitted = true;
3597 			}
3598 		}
3599 		else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
3600 		{
3601 			emit_output_variable_initializer(var);
3602 		}
3603 	}
3604 
3605 	if (emitted)
3606 		statement("");
3607 
3608 	declare_undefined_values();
3609 }
3610 
emit_output_variable_initializer(const SPIRVariable & var)3611 void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
3612 {
3613 	// If a StorageClassOutput variable has an initializer, we need to initialize it in main().
3614 	auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
3615 	auto &type = get<SPIRType>(var.basetype);
3616 	bool is_patch = has_decoration(var.self, DecorationPatch);
3617 	bool is_block = has_decoration(type.self, DecorationBlock);
3618 	bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
3619 
3620 	if (is_block)
3621 	{
3622 		uint32_t member_count = uint32_t(type.member_types.size());
3623 		bool type_is_array = type.array.size() == 1;
3624 		uint32_t array_size = 1;
3625 		if (type_is_array)
3626 			array_size = to_array_size_literal(type);
3627 		uint32_t iteration_count = is_control_point ? 1 : array_size;
3628 
3629 		// If the initializer is a block, we must initialize each block member one at a time.
3630 		for (uint32_t i = 0; i < member_count; i++)
3631 		{
3632 			// These outputs might not have been properly declared, so don't initialize them in that case.
3633 			if (has_member_decoration(type.self, i, DecorationBuiltIn))
3634 			{
3635 				if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
3636 				    !cull_distance_count)
3637 					continue;
3638 
3639 				if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
3640 				    !clip_distance_count)
3641 					continue;
3642 			}
3643 
3644 			// We need to build a per-member array first, essentially transposing from AoS to SoA.
3645 			// This code path hits when we have an array of blocks.
3646 			string lut_name;
3647 			if (type_is_array)
3648 			{
3649 				lut_name = join("_", var.self, "_", i, "_init");
3650 				uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
3651 				auto &member_type = get<SPIRType>(member_type_id);
3652 				auto array_type = member_type;
3653 				array_type.parent_type = member_type_id;
3654 				array_type.array.push_back(array_size);
3655 				array_type.array_size_literal.push_back(true);
3656 
3657 				SmallVector<string> exprs;
3658 				exprs.reserve(array_size);
3659 				auto &c = get<SPIRConstant>(var.initializer);
3660 				for (uint32_t j = 0; j < array_size; j++)
3661 					exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
3662 				statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
3663 				          type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
3664 			}
3665 
3666 			for (uint32_t j = 0; j < iteration_count; j++)
3667 			{
3668 				entry_func.fixup_hooks_in.push_back([=, &var]() {
3669 					AccessChainMeta meta;
3670 					auto &c = this->get<SPIRConstant>(var.initializer);
3671 
3672 					uint32_t invocation_id = 0;
3673 					uint32_t member_index_id = 0;
3674 					if (is_control_point)
3675 					{
3676 						uint32_t ids = ir.increase_bound_by(3);
3677 						SPIRType uint_type;
3678 						uint_type.basetype = SPIRType::UInt;
3679 						uint_type.width = 32;
3680 						set<SPIRType>(ids, uint_type);
3681 						set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
3682 						set<SPIRConstant>(ids + 2, ids, i, false);
3683 						invocation_id = ids + 1;
3684 						member_index_id = ids + 2;
3685 					}
3686 
3687 					if (is_patch)
3688 					{
3689 						statement("if (gl_InvocationID == 0)");
3690 						begin_scope();
3691 					}
3692 
3693 					if (type_is_array && !is_control_point)
3694 					{
3695 						uint32_t indices[2] = { j, i };
3696 						auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3697 						statement(chain, " = ", lut_name, "[", j, "];");
3698 					}
3699 					else if (is_control_point)
3700 					{
3701 						uint32_t indices[2] = { invocation_id, member_index_id };
3702 						auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
3703 						statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
3704 					}
3705 					else
3706 					{
3707 						auto chain =
3708 								access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3709 						statement(chain, " = ", to_expression(c.subconstants[i]), ";");
3710 					}
3711 
3712 					if (is_patch)
3713 						end_scope();
3714 				});
3715 			}
3716 		}
3717 	}
3718 	else if (is_control_point)
3719 	{
3720 		auto lut_name = join("_", var.self, "_init");
3721 		statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
3722 		          " = ", to_expression(var.initializer), ";");
3723 		entry_func.fixup_hooks_in.push_back([&, lut_name]() {
3724 			statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
3725 		});
3726 	}
3727 	else if (has_decoration(var.self, DecorationBuiltIn) &&
3728 	         BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask)
3729 	{
3730 		// We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
3731 		entry_func.fixup_hooks_in.push_back([&] {
3732 			auto &c = this->get<SPIRConstant>(var.initializer);
3733 			uint32_t num_constants = uint32_t(c.subconstants.size());
3734 			for (uint32_t i = 0; i < num_constants; i++)
3735 			{
3736 				// Don't use to_expression on constant since it might be uint, just fish out the raw int.
3737 				statement(to_expression(var.self), "[", i, "] = ",
3738 				          convert_to_string(this->get<SPIRConstant>(c.subconstants[i]).scalar_i32()), ";");
3739 			}
3740 		});
3741 	}
3742 	else
3743 	{
3744 		auto lut_name = join("_", var.self, "_init");
3745 		statement("const ", type_to_glsl(type), " ", lut_name,
3746 		          type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
3747 		entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
3748 			if (is_patch)
3749 			{
3750 				statement("if (gl_InvocationID == 0)");
3751 				begin_scope();
3752 			}
3753 			statement(to_expression(var.self), " = ", lut_name, ";");
3754 			if (is_patch)
3755 				end_scope();
3756 		});
3757 	}
3758 }
3759 
emit_extension_workarounds(spv::ExecutionModel model)3760 void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
3761 {
3762 	static const char *workaround_types[] = { "int",   "ivec2", "ivec3", "ivec4", "uint",   "uvec2", "uvec3", "uvec4",
3763 		                                      "float", "vec2",  "vec3",  "vec4",  "double", "dvec2", "dvec3", "dvec4" };
3764 
3765 	if (!options.vulkan_semantics)
3766 	{
3767 		using Supp = ShaderSubgroupSupportHelper;
3768 		auto result = shader_subgroup_supporter.resolve();
3769 
3770 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
3771 		{
3772 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
3773 
3774 			for (auto &e : exts)
3775 			{
3776 				const char *name = Supp::get_extension_name(e);
3777 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3778 
3779 				switch (e)
3780 				{
3781 				case Supp::NV_shader_thread_group:
3782 					statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
3783 					statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
3784 					statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
3785 					statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
3786 					statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
3787 					break;
3788 				case Supp::ARB_shader_ballot:
3789 					statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
3790 					statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
3791 					statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
3792 					statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
3793 					statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
3794 					break;
3795 				default:
3796 					break;
3797 				}
3798 			}
3799 			statement("#endif");
3800 			statement("");
3801 		}
3802 
3803 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
3804 		{
3805 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
3806 
3807 			for (auto &e : exts)
3808 			{
3809 				const char *name = Supp::get_extension_name(e);
3810 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3811 
3812 				switch (e)
3813 				{
3814 				case Supp::NV_shader_thread_group:
3815 					statement("#define gl_SubgroupSize gl_WarpSizeNV");
3816 					break;
3817 				case Supp::ARB_shader_ballot:
3818 					statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
3819 					break;
3820 				case Supp::AMD_gcn_shader:
3821 					statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
3822 					break;
3823 				default:
3824 					break;
3825 				}
3826 			}
3827 			statement("#endif");
3828 			statement("");
3829 		}
3830 
3831 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
3832 		{
3833 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
3834 
3835 			for (auto &e : exts)
3836 			{
3837 				const char *name = Supp::get_extension_name(e);
3838 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3839 
3840 				switch (e)
3841 				{
3842 				case Supp::NV_shader_thread_group:
3843 					statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
3844 					break;
3845 				case Supp::ARB_shader_ballot:
3846 					statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
3847 					break;
3848 				default:
3849 					break;
3850 				}
3851 			}
3852 			statement("#endif");
3853 			statement("");
3854 		}
3855 
3856 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
3857 		{
3858 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
3859 
3860 			for (auto &e : exts)
3861 			{
3862 				const char *name = Supp::get_extension_name(e);
3863 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3864 
3865 				switch (e)
3866 				{
3867 				case Supp::NV_shader_thread_group:
3868 					statement("#define gl_SubgroupID gl_WarpIDNV");
3869 					break;
3870 				default:
3871 					break;
3872 				}
3873 			}
3874 			statement("#endif");
3875 			statement("");
3876 		}
3877 
3878 		if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
3879 		{
3880 			auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
3881 
3882 			for (auto &e : exts)
3883 			{
3884 				const char *name = Supp::get_extension_name(e);
3885 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3886 
3887 				switch (e)
3888 				{
3889 				case Supp::NV_shader_thread_group:
3890 					statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
3891 					break;
3892 				default:
3893 					break;
3894 				}
3895 			}
3896 			statement("#endif");
3897 			statement("");
3898 		}
3899 
3900 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
3901 		{
3902 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
3903 
3904 			for (auto &e : exts)
3905 			{
3906 				const char *name = Supp::get_extension_name(e);
3907 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3908 
3909 				switch (e)
3910 				{
3911 				case Supp::NV_shader_thread_shuffle:
3912 					for (const char *t : workaround_types)
3913 					{
3914 						statement(t, " subgroupBroadcastFirst(", t,
3915 						          " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
3916 					}
3917 					for (const char *t : workaround_types)
3918 					{
3919 						statement(t, " subgroupBroadcast(", t,
3920 						          " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
3921 					}
3922 					break;
3923 				case Supp::ARB_shader_ballot:
3924 					for (const char *t : workaround_types)
3925 					{
3926 						statement(t, " subgroupBroadcastFirst(", t,
3927 						          " value) { return readFirstInvocationARB(value); }");
3928 					}
3929 					for (const char *t : workaround_types)
3930 					{
3931 						statement(t, " subgroupBroadcast(", t,
3932 						          " value, uint id) { return readInvocationARB(value, id); }");
3933 					}
3934 					break;
3935 				default:
3936 					break;
3937 				}
3938 			}
3939 			statement("#endif");
3940 			statement("");
3941 		}
3942 
3943 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
3944 		{
3945 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
3946 
3947 			for (auto &e : exts)
3948 			{
3949 				const char *name = Supp::get_extension_name(e);
3950 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3951 
3952 				switch (e)
3953 				{
3954 				case Supp::NV_shader_thread_group:
3955 					statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
3956 					statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
3957 					break;
3958 				default:
3959 					break;
3960 				}
3961 			}
3962 			statement("#else");
3963 			statement("uint subgroupBallotFindLSB(uvec4 value)");
3964 			begin_scope();
3965 			statement("int firstLive = findLSB(value.x);");
3966 			statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
3967 			end_scope();
3968 			statement("uint subgroupBallotFindMSB(uvec4 value)");
3969 			begin_scope();
3970 			statement("int firstLive = findMSB(value.y);");
3971 			statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
3972 			end_scope();
3973 			statement("#endif");
3974 			statement("");
3975 		}
3976 
3977 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
3978 		{
3979 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
3980 
3981 			for (auto &e : exts)
3982 			{
3983 				const char *name = Supp::get_extension_name(e);
3984 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3985 
3986 				switch (e)
3987 				{
3988 				case Supp::NV_gpu_shader_5:
3989 					statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
3990 					statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
3991 					statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
3992 					break;
3993 				case Supp::ARB_shader_group_vote:
3994 					statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
3995 					statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
3996 					statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
3997 					break;
3998 				case Supp::AMD_gcn_shader:
3999 					statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
4000 					statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
4001 					statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
4002 					          "b == ballotAMD(true); }");
4003 					break;
4004 				default:
4005 					break;
4006 				}
4007 			}
4008 			statement("#endif");
4009 			statement("");
4010 		}
4011 
4012 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
4013 		{
4014 			statement("#ifndef GL_KHR_shader_subgroup_vote");
4015 			statement(
4016 			    "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
4017 			    "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
4018 			for (const char *t : workaround_types)
4019 				statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
4020 			statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
4021 			statement("#endif");
4022 			statement("");
4023 		}
4024 
4025 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
4026 		{
4027 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
4028 
4029 			for (auto &e : exts)
4030 			{
4031 				const char *name = Supp::get_extension_name(e);
4032 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
4033 
4034 				switch (e)
4035 				{
4036 				case Supp::NV_shader_thread_group:
4037 					statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
4038 					break;
4039 				case Supp::ARB_shader_ballot:
4040 					statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
4041 					break;
4042 				default:
4043 					break;
4044 				}
4045 			}
4046 			statement("#endif");
4047 			statement("");
4048 		}
4049 
4050 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
4051 		{
4052 			statement("#ifndef GL_KHR_shader_subgroup_basic");
4053 			statement("bool subgroupElect()");
4054 			begin_scope();
4055 			statement("uvec4 activeMask = subgroupBallot(true);");
4056 			statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
4057 			statement("return gl_SubgroupInvocationID == firstLive;");
4058 			end_scope();
4059 			statement("#endif");
4060 			statement("");
4061 		}
4062 
4063 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
4064 		{
4065 			// Extensions we're using in place of GL_KHR_shader_subgroup_basic state
4066 			// that subgroup execute in lockstep so this barrier is implicit.
4067 			// However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
4068 			// and a specific test of optimizing scans by leveraging lock-step invocation execution,
4069 			// has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
4070 			// https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
4071 			statement("#ifndef GL_KHR_shader_subgroup_basic");
4072 			statement("void subgroupBarrier() { memoryBarrierShared(); }");
4073 			statement("#endif");
4074 			statement("");
4075 		}
4076 
4077 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
4078 		{
4079 			if (model == spv::ExecutionModelGLCompute)
4080 			{
4081 				statement("#ifndef GL_KHR_shader_subgroup_basic");
4082 				statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
4083 				statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
4084 				statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
4085 				statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
4086 				statement("#endif");
4087 			}
4088 			else
4089 			{
4090 				statement("#ifndef GL_KHR_shader_subgroup_basic");
4091 				statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
4092 				statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
4093 				statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
4094 				statement("#endif");
4095 			}
4096 			statement("");
4097 		}
4098 
4099 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
4100 		{
4101 			statement("#ifndef GL_KHR_shader_subgroup_ballot");
4102 			statement("bool subgroupInverseBallot(uvec4 value)");
4103 			begin_scope();
4104 			statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
4105 			end_scope();
4106 
4107 			statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
4108 			begin_scope();
4109 			statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
4110 			statement("ivec2 c = bitCount(v);");
4111 			statement_no_indent("#ifdef GL_NV_shader_thread_group");
4112 			statement("return uint(c.x);");
4113 			statement_no_indent("#else");
4114 			statement("return uint(c.x + c.y);");
4115 			statement_no_indent("#endif");
4116 			end_scope();
4117 
4118 			statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
4119 			begin_scope();
4120 			statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
4121 			statement("ivec2 c = bitCount(v);");
4122 			statement_no_indent("#ifdef GL_NV_shader_thread_group");
4123 			statement("return uint(c.x);");
4124 			statement_no_indent("#else");
4125 			statement("return uint(c.x + c.y);");
4126 			statement_no_indent("#endif");
4127 			end_scope();
4128 			statement("#endif");
4129 			statement("");
4130 		}
4131 
4132 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
4133 		{
4134 			statement("#ifndef GL_KHR_shader_subgroup_ballot");
4135 			statement("uint subgroupBallotBitCount(uvec4 value)");
4136 			begin_scope();
4137 			statement("ivec2 c = bitCount(value.xy);");
4138 			statement_no_indent("#ifdef GL_NV_shader_thread_group");
4139 			statement("return uint(c.x);");
4140 			statement_no_indent("#else");
4141 			statement("return uint(c.x + c.y);");
4142 			statement_no_indent("#endif");
4143 			end_scope();
4144 			statement("#endif");
4145 			statement("");
4146 		}
4147 
4148 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
4149 		{
4150 			statement("#ifndef GL_KHR_shader_subgroup_ballot");
4151 			statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
4152 			begin_scope();
4153 			statement_no_indent("#ifdef GL_NV_shader_thread_group");
4154 			statement("uint shifted = value.x >> index;");
4155 			statement_no_indent("#else");
4156 			statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
4157 			statement_no_indent("#endif");
4158 			statement("return (shifted & 1u) != 0u;");
4159 			end_scope();
4160 			statement("#endif");
4161 			statement("");
4162 		}
4163 	}
4164 
4165 	if (!workaround_ubo_load_overload_types.empty())
4166 	{
4167 		for (auto &type_id : workaround_ubo_load_overload_types)
4168 		{
4169 			auto &type = get<SPIRType>(type_id);
4170 			statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type),
4171 			          " wrap) { return wrap; }");
4172 		}
4173 		statement("");
4174 	}
4175 
4176 	if (requires_transpose_2x2)
4177 	{
4178 		statement("mat2 spvTranspose(mat2 m)");
4179 		begin_scope();
4180 		statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
4181 		end_scope();
4182 		statement("");
4183 	}
4184 
4185 	if (requires_transpose_3x3)
4186 	{
4187 		statement("mat3 spvTranspose(mat3 m)");
4188 		begin_scope();
4189 		statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
4190 		end_scope();
4191 		statement("");
4192 	}
4193 
4194 	if (requires_transpose_4x4)
4195 	{
4196 		statement("mat4 spvTranspose(mat4 m)");
4197 		begin_scope();
4198 		statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
4199 		          "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
4200 		end_scope();
4201 		statement("");
4202 	}
4203 }
4204 
4205 // Returns a string representation of the ID, usable as a function arg.
4206 // Default is to simply return the expression representation fo the arg ID.
4207 // Subclasses may override to modify the return value.
to_func_call_arg(const SPIRFunction::Parameter &,uint32_t id)4208 string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
4209 {
4210 	// Make sure that we use the name of the original variable, and not the parameter alias.
4211 	uint32_t name_id = id;
4212 	auto *var = maybe_get<SPIRVariable>(id);
4213 	if (var && var->basevariable)
4214 		name_id = var->basevariable;
4215 	return to_expression(name_id);
4216 }
4217 
handle_invalid_expression(uint32_t id)4218 void CompilerGLSL::handle_invalid_expression(uint32_t id)
4219 {
4220 	// We tried to read an invalidated expression.
4221 	// This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
4222 	forced_temporaries.insert(id);
4223 	force_recompile();
4224 }
4225 
4226 // Converts the format of the current expression from packed to unpacked,
4227 // by wrapping the expression in a constructor of the appropriate type.
4228 // GLSL does not support packed formats, so simply return the expression.
4229 // Subclasses that do will override.
unpack_expression_type(string expr_str,const SPIRType &,uint32_t,bool,bool)4230 string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
4231 {
4232 	return expr_str;
4233 }
4234 
4235 // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
strip_enclosed_expression(string & expr)4236 void CompilerGLSL::strip_enclosed_expression(string &expr)
4237 {
4238 	if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
4239 		return;
4240 
4241 	// Have to make sure that our first and last parens actually enclose everything inside it.
4242 	uint32_t paren_count = 0;
4243 	for (auto &c : expr)
4244 	{
4245 		if (c == '(')
4246 			paren_count++;
4247 		else if (c == ')')
4248 		{
4249 			paren_count--;
4250 
4251 			// If we hit 0 and this is not the final char, our first and final parens actually don't
4252 			// enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
4253 			if (paren_count == 0 && &c != &expr.back())
4254 				return;
4255 		}
4256 	}
4257 	expr.erase(expr.size() - 1, 1);
4258 	expr.erase(begin(expr));
4259 }
4260 
enclose_expression(const string & expr)4261 string CompilerGLSL::enclose_expression(const string &expr)
4262 {
4263 	bool need_parens = false;
4264 
4265 	// If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
4266 	// unary expressions.
4267 	if (!expr.empty())
4268 	{
4269 		auto c = expr.front();
4270 		if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
4271 			need_parens = true;
4272 	}
4273 
4274 	if (!need_parens)
4275 	{
4276 		uint32_t paren_count = 0;
4277 		for (auto c : expr)
4278 		{
4279 			if (c == '(' || c == '[')
4280 				paren_count++;
4281 			else if (c == ')' || c == ']')
4282 			{
4283 				assert(paren_count);
4284 				paren_count--;
4285 			}
4286 			else if (c == ' ' && paren_count == 0)
4287 			{
4288 				need_parens = true;
4289 				break;
4290 			}
4291 		}
4292 		assert(paren_count == 0);
4293 	}
4294 
4295 	// If this expression contains any spaces which are not enclosed by parentheses,
4296 	// we need to enclose it so we can treat the whole string as an expression.
4297 	// This happens when two expressions have been part of a binary op earlier.
4298 	if (need_parens)
4299 		return join('(', expr, ')');
4300 	else
4301 		return expr;
4302 }
4303 
dereference_expression(const SPIRType & expr_type,const std::string & expr)4304 string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
4305 {
4306 	// If this expression starts with an address-of operator ('&'), then
4307 	// just return the part after the operator.
4308 	// TODO: Strip parens if unnecessary?
4309 	if (expr.front() == '&')
4310 		return expr.substr(1);
4311 	else if (backend.native_pointers)
4312 		return join('*', expr);
4313 	else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
4314 	         expr_type.pointer_depth == 1)
4315 	{
4316 		return join(enclose_expression(expr), ".value");
4317 	}
4318 	else
4319 		return expr;
4320 }
4321 
address_of_expression(const std::string & expr)4322 string CompilerGLSL::address_of_expression(const std::string &expr)
4323 {
4324 	if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
4325 	{
4326 		// If we have an expression which looks like (*foo), taking the address of it is the same as stripping
4327 		// the first two and last characters. We might have to enclose the expression.
4328 		// This doesn't work for cases like (*foo + 10),
4329 		// but this is an r-value expression which we cannot take the address of anyways.
4330 		return enclose_expression(expr.substr(2, expr.size() - 3));
4331 	}
4332 	else if (expr.front() == '*')
4333 	{
4334 		// If this expression starts with a dereference operator ('*'), then
4335 		// just return the part after the operator.
4336 		return expr.substr(1);
4337 	}
4338 	else
4339 		return join('&', enclose_expression(expr));
4340 }
4341 
4342 // Just like to_expression except that we enclose the expression inside parentheses if needed.
to_enclosed_expression(uint32_t id,bool register_expression_read)4343 string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
4344 {
4345 	return enclose_expression(to_expression(id, register_expression_read));
4346 }
4347 
4348 // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
4349 // need_transpose must be forced to false.
to_unpacked_row_major_matrix_expression(uint32_t id)4350 string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
4351 {
4352 	return unpack_expression_type(to_expression(id), expression_type(id),
4353 	                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4354 	                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
4355 }
4356 
to_unpacked_expression(uint32_t id,bool register_expression_read)4357 string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
4358 {
4359 	// If we need to transpose, it will also take care of unpacking rules.
4360 	auto *e = maybe_get<SPIRExpression>(id);
4361 	bool need_transpose = e && e->need_transpose;
4362 	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4363 	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4364 
4365 	if (!need_transpose && (is_remapped || is_packed))
4366 	{
4367 		return unpack_expression_type(to_expression(id, register_expression_read),
4368 		                              get_pointee_type(expression_type_id(id)),
4369 		                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4370 		                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4371 	}
4372 	else
4373 		return to_expression(id, register_expression_read);
4374 }
4375 
to_enclosed_unpacked_expression(uint32_t id,bool register_expression_read)4376 string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
4377 {
4378 	// If we need to transpose, it will also take care of unpacking rules.
4379 	auto *e = maybe_get<SPIRExpression>(id);
4380 	bool need_transpose = e && e->need_transpose;
4381 	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4382 	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4383 	if (!need_transpose && (is_remapped || is_packed))
4384 	{
4385 		return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
4386 		                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4387 		                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4388 	}
4389 	else
4390 		return to_enclosed_expression(id, register_expression_read);
4391 }
4392 
to_dereferenced_expression(uint32_t id,bool register_expression_read)4393 string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
4394 {
4395 	auto &type = expression_type(id);
4396 	if (type.pointer && should_dereference(id))
4397 		return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
4398 	else
4399 		return to_expression(id, register_expression_read);
4400 }
4401 
to_pointer_expression(uint32_t id,bool register_expression_read)4402 string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
4403 {
4404 	auto &type = expression_type(id);
4405 	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4406 		return address_of_expression(to_enclosed_expression(id, register_expression_read));
4407 	else
4408 		return to_unpacked_expression(id, register_expression_read);
4409 }
4410 
to_enclosed_pointer_expression(uint32_t id,bool register_expression_read)4411 string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
4412 {
4413 	auto &type = expression_type(id);
4414 	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4415 		return address_of_expression(to_enclosed_expression(id, register_expression_read));
4416 	else
4417 		return to_enclosed_unpacked_expression(id, register_expression_read);
4418 }
4419 
to_extract_component_expression(uint32_t id,uint32_t index)4420 string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
4421 {
4422 	auto expr = to_enclosed_expression(id);
4423 	if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
4424 		return join(expr, "[", index, "]");
4425 	else
4426 		return join(expr, ".", index_to_swizzle(index));
4427 }
4428 
to_extract_constant_composite_expression(uint32_t result_type,const SPIRConstant & c,const uint32_t * chain,uint32_t length)4429 string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
4430                                                               const uint32_t *chain, uint32_t length)
4431 {
4432 	// It is kinda silly if application actually enter this path since they know the constant up front.
4433 	// It is useful here to extract the plain constant directly.
4434 	SPIRConstant tmp;
4435 	tmp.constant_type = result_type;
4436 	auto &composite_type = get<SPIRType>(c.constant_type);
4437 	assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
4438 	assert(!c.specialization);
4439 
4440 	if (is_matrix(composite_type))
4441 	{
4442 		if (length == 2)
4443 		{
4444 			tmp.m.c[0].vecsize = 1;
4445 			tmp.m.columns = 1;
4446 			tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
4447 		}
4448 		else
4449 		{
4450 			assert(length == 1);
4451 			tmp.m.c[0].vecsize = composite_type.vecsize;
4452 			tmp.m.columns = 1;
4453 			tmp.m.c[0] = c.m.c[chain[0]];
4454 		}
4455 	}
4456 	else
4457 	{
4458 		assert(length == 1);
4459 		tmp.m.c[0].vecsize = 1;
4460 		tmp.m.columns = 1;
4461 		tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
4462 	}
4463 
4464 	return constant_expression(tmp);
4465 }
4466 
to_rerolled_array_expression(const string & base_expr,const SPIRType & type)4467 string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
4468 {
4469 	uint32_t size = to_array_size_literal(type);
4470 	auto &parent = get<SPIRType>(type.parent_type);
4471 	string expr = "{ ";
4472 
4473 	for (uint32_t i = 0; i < size; i++)
4474 	{
4475 		auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
4476 		if (parent.array.empty())
4477 			expr += subexpr;
4478 		else
4479 			expr += to_rerolled_array_expression(subexpr, parent);
4480 
4481 		if (i + 1 < size)
4482 			expr += ", ";
4483 	}
4484 
4485 	expr += " }";
4486 	return expr;
4487 }
4488 
to_composite_constructor_expression(uint32_t id,bool uses_buffer_offset)4489 string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset)
4490 {
4491 	auto &type = expression_type(id);
4492 
4493 	bool reroll_array = !type.array.empty() && (!backend.array_is_value_type ||
4494 	                                            (uses_buffer_offset && !backend.buffer_offset_array_is_value_type));
4495 
4496 	if (reroll_array)
4497 	{
4498 		// For this case, we need to "re-roll" an array initializer from a temporary.
4499 		// We cannot simply pass the array directly, since it decays to a pointer and it cannot
4500 		// participate in a struct initializer. E.g.
4501 		// float arr[2] = { 1.0, 2.0 };
4502 		// Foo foo = { arr }; must be transformed to
4503 		// Foo foo = { { arr[0], arr[1] } };
4504 		// The array sizes cannot be deduced from specialization constants since we cannot use any loops.
4505 
4506 		// We're only triggering one read of the array expression, but this is fine since arrays have to be declared
4507 		// as temporaries anyways.
4508 		return to_rerolled_array_expression(to_enclosed_expression(id), type);
4509 	}
4510 	else
4511 		return to_unpacked_expression(id);
4512 }
4513 
to_non_uniform_aware_expression(uint32_t id)4514 string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
4515 {
4516 	string expr = to_expression(id);
4517 
4518 	if (has_decoration(id, DecorationNonUniform))
4519 		convert_non_uniform_expression(expr, id);
4520 
4521 	return expr;
4522 }
4523 
to_expression(uint32_t id,bool register_expression_read)4524 string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
4525 {
4526 	auto itr = invalid_expressions.find(id);
4527 	if (itr != end(invalid_expressions))
4528 		handle_invalid_expression(id);
4529 
4530 	if (ir.ids[id].get_type() == TypeExpression)
4531 	{
4532 		// We might have a more complex chain of dependencies.
4533 		// A possible scenario is that we
4534 		//
4535 		// %1 = OpLoad
4536 		// %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
4537 		// %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
4538 		// OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
4539 		// %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
4540 		//
4541 		// However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
4542 		// and see that we should not forward reads of the original variable.
4543 		auto &expr = get<SPIRExpression>(id);
4544 		for (uint32_t dep : expr.expression_dependencies)
4545 			if (invalid_expressions.find(dep) != end(invalid_expressions))
4546 				handle_invalid_expression(dep);
4547 	}
4548 
4549 	if (register_expression_read)
4550 		track_expression_read(id);
4551 
4552 	switch (ir.ids[id].get_type())
4553 	{
4554 	case TypeExpression:
4555 	{
4556 		auto &e = get<SPIRExpression>(id);
4557 		if (e.base_expression)
4558 			return to_enclosed_expression(e.base_expression) + e.expression;
4559 		else if (e.need_transpose)
4560 		{
4561 			// This should not be reached for access chains, since we always deal explicitly with transpose state
4562 			// when consuming an access chain expression.
4563 			uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4564 			bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4565 			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
4566 			                                is_packed);
4567 		}
4568 		else if (flattened_structs.count(id))
4569 		{
4570 			return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
4571 		}
4572 		else
4573 		{
4574 			if (is_forcing_recompilation())
4575 			{
4576 				// During first compilation phase, certain expression patterns can trigger exponential growth of memory.
4577 				// Avoid this by returning dummy expressions during this phase.
4578 				// Do not use empty expressions here, because those are sentinels for other cases.
4579 				return "_";
4580 			}
4581 			else
4582 				return e.expression;
4583 		}
4584 	}
4585 
4586 	case TypeConstant:
4587 	{
4588 		auto &c = get<SPIRConstant>(id);
4589 		auto &type = get<SPIRType>(c.constant_type);
4590 
4591 		// WorkGroupSize may be a constant.
4592 		auto &dec = ir.meta[c.self].decoration;
4593 		if (dec.builtin)
4594 			return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
4595 		else if (c.specialization)
4596 			return to_name(id);
4597 		else if (c.is_used_as_lut)
4598 			return to_name(id);
4599 		else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
4600 			return to_name(id);
4601 		else if (!type.array.empty() && !backend.can_declare_arrays_inline)
4602 			return to_name(id);
4603 		else
4604 			return constant_expression(c);
4605 	}
4606 
4607 	case TypeConstantOp:
4608 		return to_name(id);
4609 
4610 	case TypeVariable:
4611 	{
4612 		auto &var = get<SPIRVariable>(id);
4613 		// If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
4614 		// the variable has not been declared yet.
4615 		if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
4616 			return to_expression(var.static_expression);
4617 		else if (var.deferred_declaration)
4618 		{
4619 			var.deferred_declaration = false;
4620 			return variable_decl(var);
4621 		}
4622 		else if (flattened_structs.count(id))
4623 		{
4624 			return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
4625 		}
4626 		else
4627 		{
4628 			auto &dec = ir.meta[var.self].decoration;
4629 			if (dec.builtin)
4630 				return builtin_to_glsl(dec.builtin_type, var.storage);
4631 			else
4632 				return to_name(id);
4633 		}
4634 	}
4635 
4636 	case TypeCombinedImageSampler:
4637 		// This type should never be taken the expression of directly.
4638 		// The intention is that texture sampling functions will extract the image and samplers
4639 		// separately and take their expressions as needed.
4640 		// GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
4641 		// expression ala sampler2D(texture, sampler).
4642 		SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
4643 
4644 	case TypeAccessChain:
4645 		// We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
4646 		SPIRV_CROSS_THROW("Access chains have no default expression representation.");
4647 
4648 	default:
4649 		return to_name(id);
4650 	}
4651 }
4652 
constant_op_expression(const SPIRConstantOp & cop)4653 string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
4654 {
4655 	auto &type = get<SPIRType>(cop.basetype);
4656 	bool binary = false;
4657 	bool unary = false;
4658 	string op;
4659 
4660 	if (is_legacy() && is_unsigned_opcode(cop.opcode))
4661 		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
4662 
4663 	// TODO: Find a clean way to reuse emit_instruction.
4664 	switch (cop.opcode)
4665 	{
4666 	case OpSConvert:
4667 	case OpUConvert:
4668 	case OpFConvert:
4669 		op = type_to_glsl_constructor(type);
4670 		break;
4671 
4672 #define GLSL_BOP(opname, x) \
4673 	case Op##opname:        \
4674 		binary = true;      \
4675 		op = x;             \
4676 		break
4677 
4678 #define GLSL_UOP(opname, x) \
4679 	case Op##opname:        \
4680 		unary = true;       \
4681 		op = x;             \
4682 		break
4683 
4684 		GLSL_UOP(SNegate, "-");
4685 		GLSL_UOP(Not, "~");
4686 		GLSL_BOP(IAdd, "+");
4687 		GLSL_BOP(ISub, "-");
4688 		GLSL_BOP(IMul, "*");
4689 		GLSL_BOP(SDiv, "/");
4690 		GLSL_BOP(UDiv, "/");
4691 		GLSL_BOP(UMod, "%");
4692 		GLSL_BOP(SMod, "%");
4693 		GLSL_BOP(ShiftRightLogical, ">>");
4694 		GLSL_BOP(ShiftRightArithmetic, ">>");
4695 		GLSL_BOP(ShiftLeftLogical, "<<");
4696 		GLSL_BOP(BitwiseOr, "|");
4697 		GLSL_BOP(BitwiseXor, "^");
4698 		GLSL_BOP(BitwiseAnd, "&");
4699 		GLSL_BOP(LogicalOr, "||");
4700 		GLSL_BOP(LogicalAnd, "&&");
4701 		GLSL_UOP(LogicalNot, "!");
4702 		GLSL_BOP(LogicalEqual, "==");
4703 		GLSL_BOP(LogicalNotEqual, "!=");
4704 		GLSL_BOP(IEqual, "==");
4705 		GLSL_BOP(INotEqual, "!=");
4706 		GLSL_BOP(ULessThan, "<");
4707 		GLSL_BOP(SLessThan, "<");
4708 		GLSL_BOP(ULessThanEqual, "<=");
4709 		GLSL_BOP(SLessThanEqual, "<=");
4710 		GLSL_BOP(UGreaterThan, ">");
4711 		GLSL_BOP(SGreaterThan, ">");
4712 		GLSL_BOP(UGreaterThanEqual, ">=");
4713 		GLSL_BOP(SGreaterThanEqual, ">=");
4714 
4715 	case OpSelect:
4716 	{
4717 		if (cop.arguments.size() < 3)
4718 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4719 
4720 		// This one is pretty annoying. It's triggered from
4721 		// uint(bool), int(bool) from spec constants.
4722 		// In order to preserve its compile-time constness in Vulkan GLSL,
4723 		// we need to reduce the OpSelect expression back to this simplified model.
4724 		// If we cannot, fail.
4725 		if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
4726 		{
4727 			// Implement as a simple cast down below.
4728 		}
4729 		else
4730 		{
4731 			// Implement a ternary and pray the compiler understands it :)
4732 			return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
4733 		}
4734 		break;
4735 	}
4736 
4737 	case OpVectorShuffle:
4738 	{
4739 		string expr = type_to_glsl_constructor(type);
4740 		expr += "(";
4741 
4742 		uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
4743 		string left_arg = to_enclosed_expression(cop.arguments[0]);
4744 		string right_arg = to_enclosed_expression(cop.arguments[1]);
4745 
4746 		for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
4747 		{
4748 			uint32_t index = cop.arguments[i];
4749 			if (index >= left_components)
4750 				expr += right_arg + "." + "xyzw"[index - left_components];
4751 			else
4752 				expr += left_arg + "." + "xyzw"[index];
4753 
4754 			if (i + 1 < uint32_t(cop.arguments.size()))
4755 				expr += ", ";
4756 		}
4757 
4758 		expr += ")";
4759 		return expr;
4760 	}
4761 
4762 	case OpCompositeExtract:
4763 	{
4764 		auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
4765 		                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
4766 		return expr;
4767 	}
4768 
4769 	case OpCompositeInsert:
4770 		SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
4771 
4772 	default:
4773 		// Some opcodes are unimplemented here, these are currently not possible to test from glslang.
4774 		SPIRV_CROSS_THROW("Unimplemented spec constant op.");
4775 	}
4776 
4777 	uint32_t bit_width = 0;
4778 	if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4779 		bit_width = expression_type(cop.arguments[0]).width;
4780 
4781 	SPIRType::BaseType input_type;
4782 	bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
4783 
4784 	switch (cop.opcode)
4785 	{
4786 	case OpIEqual:
4787 	case OpINotEqual:
4788 		input_type = to_signed_basetype(bit_width);
4789 		break;
4790 
4791 	case OpSLessThan:
4792 	case OpSLessThanEqual:
4793 	case OpSGreaterThan:
4794 	case OpSGreaterThanEqual:
4795 	case OpSMod:
4796 	case OpSDiv:
4797 	case OpShiftRightArithmetic:
4798 	case OpSConvert:
4799 	case OpSNegate:
4800 		input_type = to_signed_basetype(bit_width);
4801 		break;
4802 
4803 	case OpULessThan:
4804 	case OpULessThanEqual:
4805 	case OpUGreaterThan:
4806 	case OpUGreaterThanEqual:
4807 	case OpUMod:
4808 	case OpUDiv:
4809 	case OpShiftRightLogical:
4810 	case OpUConvert:
4811 		input_type = to_unsigned_basetype(bit_width);
4812 		break;
4813 
4814 	default:
4815 		input_type = type.basetype;
4816 		break;
4817 	}
4818 
4819 #undef GLSL_BOP
4820 #undef GLSL_UOP
4821 	if (binary)
4822 	{
4823 		if (cop.arguments.size() < 2)
4824 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4825 
4826 		string cast_op0;
4827 		string cast_op1;
4828 		auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
4829 		                                              cop.arguments[1], skip_cast_if_equal_type);
4830 
4831 		if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
4832 		{
4833 			expected_type.basetype = input_type;
4834 			auto expr = bitcast_glsl_op(type, expected_type);
4835 			expr += '(';
4836 			expr += join(cast_op0, " ", op, " ", cast_op1);
4837 			expr += ')';
4838 			return expr;
4839 		}
4840 		else
4841 			return join("(", cast_op0, " ", op, " ", cast_op1, ")");
4842 	}
4843 	else if (unary)
4844 	{
4845 		if (cop.arguments.size() < 1)
4846 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4847 
4848 		// Auto-bitcast to result type as needed.
4849 		// Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
4850 		return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
4851 	}
4852 	else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4853 	{
4854 		if (cop.arguments.size() < 1)
4855 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4856 
4857 		auto &arg_type = expression_type(cop.arguments[0]);
4858 		if (arg_type.width < type.width && input_type != arg_type.basetype)
4859 		{
4860 			auto expected = arg_type;
4861 			expected.basetype = input_type;
4862 			return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
4863 		}
4864 		else
4865 			return join(op, "(", to_expression(cop.arguments[0]), ")");
4866 	}
4867 	else
4868 	{
4869 		if (cop.arguments.size() < 1)
4870 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4871 		return join(op, "(", to_expression(cop.arguments[0]), ")");
4872 	}
4873 }
4874 
constant_expression(const SPIRConstant & c)4875 string CompilerGLSL::constant_expression(const SPIRConstant &c)
4876 {
4877 	auto &type = get<SPIRType>(c.constant_type);
4878 
4879 	if (type.pointer)
4880 	{
4881 		return backend.null_pointer_literal;
4882 	}
4883 	else if (!c.subconstants.empty())
4884 	{
4885 		// Handles Arrays and structures.
4886 		string res;
4887 
4888 		// Allow Metal to use the array<T> template to make arrays a value type
4889 		bool needs_trailing_tracket = false;
4890 		if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
4891 		    type.array.empty())
4892 		{
4893 			res = type_to_glsl_constructor(type) + "{ ";
4894 		}
4895 		else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
4896 		         !type.array.empty())
4897 		{
4898 			res = type_to_glsl_constructor(type) + "({ ";
4899 			needs_trailing_tracket = true;
4900 		}
4901 		else if (backend.use_initializer_list)
4902 		{
4903 			res = "{ ";
4904 		}
4905 		else
4906 		{
4907 			res = type_to_glsl_constructor(type) + "(";
4908 		}
4909 
4910 		for (auto &elem : c.subconstants)
4911 		{
4912 			auto &subc = get<SPIRConstant>(elem);
4913 			if (subc.specialization)
4914 				res += to_name(elem);
4915 			else
4916 				res += constant_expression(subc);
4917 
4918 			if (&elem != &c.subconstants.back())
4919 				res += ", ";
4920 		}
4921 
4922 		res += backend.use_initializer_list ? " }" : ")";
4923 		if (needs_trailing_tracket)
4924 			res += ")";
4925 
4926 		return res;
4927 	}
4928 	else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
4929 	{
4930 		// Metal tessellation likes empty structs which are then constant expressions.
4931 		if (backend.supports_empty_struct)
4932 			return "{ }";
4933 		else if (backend.use_typed_initializer_list)
4934 			return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
4935 		else if (backend.use_initializer_list)
4936 			return "{ 0 }";
4937 		else
4938 			return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
4939 	}
4940 	else if (c.columns() == 1)
4941 	{
4942 		return constant_expression_vector(c, 0);
4943 	}
4944 	else
4945 	{
4946 		string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
4947 		for (uint32_t col = 0; col < c.columns(); col++)
4948 		{
4949 			if (c.specialization_constant_id(col) != 0)
4950 				res += to_name(c.specialization_constant_id(col));
4951 			else
4952 				res += constant_expression_vector(c, col);
4953 
4954 			if (col + 1 < c.columns())
4955 				res += ", ";
4956 		}
4957 		res += ")";
4958 		return res;
4959 	}
4960 }
4961 
4962 #ifdef _MSC_VER
4963 // sprintf warning.
4964 // We cannot rely on snprintf existing because, ..., MSVC.
4965 #pragma warning(push)
4966 #pragma warning(disable : 4996)
4967 #endif
4968 
convert_half_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)4969 string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
4970 {
4971 	string res;
4972 	float float_value = c.scalar_f16(col, row);
4973 
4974 	// There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
4975 	// of complicated workarounds, just value-cast to the half type always.
4976 	if (std::isnan(float_value) || std::isinf(float_value))
4977 	{
4978 		SPIRType type;
4979 		type.basetype = SPIRType::Half;
4980 		type.vecsize = 1;
4981 		type.columns = 1;
4982 
4983 		if (float_value == numeric_limits<float>::infinity())
4984 			res = join(type_to_glsl(type), "(1.0 / 0.0)");
4985 		else if (float_value == -numeric_limits<float>::infinity())
4986 			res = join(type_to_glsl(type), "(-1.0 / 0.0)");
4987 		else if (std::isnan(float_value))
4988 			res = join(type_to_glsl(type), "(0.0 / 0.0)");
4989 		else
4990 			SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
4991 	}
4992 	else
4993 	{
4994 		SPIRType type;
4995 		type.basetype = SPIRType::Half;
4996 		type.vecsize = 1;
4997 		type.columns = 1;
4998 		res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
4999 	}
5000 
5001 	return res;
5002 }
5003 
convert_float_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)5004 string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5005 {
5006 	string res;
5007 	float float_value = c.scalar_f32(col, row);
5008 
5009 	if (std::isnan(float_value) || std::isinf(float_value))
5010 	{
5011 		// Use special representation.
5012 		if (!is_legacy())
5013 		{
5014 			SPIRType out_type;
5015 			SPIRType in_type;
5016 			out_type.basetype = SPIRType::Float;
5017 			in_type.basetype = SPIRType::UInt;
5018 			out_type.vecsize = 1;
5019 			in_type.vecsize = 1;
5020 			out_type.width = 32;
5021 			in_type.width = 32;
5022 
5023 			char print_buffer[32];
5024 			sprintf(print_buffer, "0x%xu", c.scalar(col, row));
5025 
5026 			const char *comment = "inf";
5027 			if (float_value == -numeric_limits<float>::infinity())
5028 				comment = "-inf";
5029 			else if (std::isnan(float_value))
5030 				comment = "nan";
5031 			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
5032 		}
5033 		else
5034 		{
5035 			if (float_value == numeric_limits<float>::infinity())
5036 			{
5037 				if (backend.float_literal_suffix)
5038 					res = "(1.0f / 0.0f)";
5039 				else
5040 					res = "(1.0 / 0.0)";
5041 			}
5042 			else if (float_value == -numeric_limits<float>::infinity())
5043 			{
5044 				if (backend.float_literal_suffix)
5045 					res = "(-1.0f / 0.0f)";
5046 				else
5047 					res = "(-1.0 / 0.0)";
5048 			}
5049 			else if (std::isnan(float_value))
5050 			{
5051 				if (backend.float_literal_suffix)
5052 					res = "(0.0f / 0.0f)";
5053 				else
5054 					res = "(0.0 / 0.0)";
5055 			}
5056 			else
5057 				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5058 		}
5059 	}
5060 	else
5061 	{
5062 		res = convert_to_string(float_value, current_locale_radix_character);
5063 		if (backend.float_literal_suffix)
5064 			res += "f";
5065 	}
5066 
5067 	return res;
5068 }
5069 
convert_double_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)5070 std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5071 {
5072 	string res;
5073 	double double_value = c.scalar_f64(col, row);
5074 
5075 	if (std::isnan(double_value) || std::isinf(double_value))
5076 	{
5077 		// Use special representation.
5078 		if (!is_legacy())
5079 		{
5080 			SPIRType out_type;
5081 			SPIRType in_type;
5082 			out_type.basetype = SPIRType::Double;
5083 			in_type.basetype = SPIRType::UInt64;
5084 			out_type.vecsize = 1;
5085 			in_type.vecsize = 1;
5086 			out_type.width = 64;
5087 			in_type.width = 64;
5088 
5089 			uint64_t u64_value = c.scalar_u64(col, row);
5090 
5091 			if (options.es)
5092 				SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
5093 			require_extension_internal("GL_ARB_gpu_shader_int64");
5094 
5095 			char print_buffer[64];
5096 			sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
5097 			        backend.long_long_literal_suffix ? "ull" : "ul");
5098 
5099 			const char *comment = "inf";
5100 			if (double_value == -numeric_limits<double>::infinity())
5101 				comment = "-inf";
5102 			else if (std::isnan(double_value))
5103 				comment = "nan";
5104 			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
5105 		}
5106 		else
5107 		{
5108 			if (options.es)
5109 				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
5110 			if (options.version < 400)
5111 				require_extension_internal("GL_ARB_gpu_shader_fp64");
5112 
5113 			if (double_value == numeric_limits<double>::infinity())
5114 			{
5115 				if (backend.double_literal_suffix)
5116 					res = "(1.0lf / 0.0lf)";
5117 				else
5118 					res = "(1.0 / 0.0)";
5119 			}
5120 			else if (double_value == -numeric_limits<double>::infinity())
5121 			{
5122 				if (backend.double_literal_suffix)
5123 					res = "(-1.0lf / 0.0lf)";
5124 				else
5125 					res = "(-1.0 / 0.0)";
5126 			}
5127 			else if (std::isnan(double_value))
5128 			{
5129 				if (backend.double_literal_suffix)
5130 					res = "(0.0lf / 0.0lf)";
5131 				else
5132 					res = "(0.0 / 0.0)";
5133 			}
5134 			else
5135 				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5136 		}
5137 	}
5138 	else
5139 	{
5140 		res = convert_to_string(double_value, current_locale_radix_character);
5141 		if (backend.double_literal_suffix)
5142 			res += "lf";
5143 	}
5144 
5145 	return res;
5146 }
5147 
5148 #ifdef _MSC_VER
5149 #pragma warning(pop)
5150 #endif
5151 
constant_expression_vector(const SPIRConstant & c,uint32_t vector)5152 string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
5153 {
5154 	auto type = get<SPIRType>(c.constant_type);
5155 	type.columns = 1;
5156 
5157 	auto scalar_type = type;
5158 	scalar_type.vecsize = 1;
5159 
5160 	string res;
5161 	bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
5162 	bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
5163 
5164 	if (!type_is_floating_point(type))
5165 	{
5166 		// Cannot swizzle literal integers as a special case.
5167 		swizzle_splat = false;
5168 	}
5169 
5170 	if (splat || swizzle_splat)
5171 	{
5172 		// Cannot use constant splatting if we have specialization constants somewhere in the vector.
5173 		for (uint32_t i = 0; i < c.vector_size(); i++)
5174 		{
5175 			if (c.specialization_constant_id(vector, i) != 0)
5176 			{
5177 				splat = false;
5178 				swizzle_splat = false;
5179 				break;
5180 			}
5181 		}
5182 	}
5183 
5184 	if (splat || swizzle_splat)
5185 	{
5186 		if (type.width == 64)
5187 		{
5188 			uint64_t ident = c.scalar_u64(vector, 0);
5189 			for (uint32_t i = 1; i < c.vector_size(); i++)
5190 			{
5191 				if (ident != c.scalar_u64(vector, i))
5192 				{
5193 					splat = false;
5194 					swizzle_splat = false;
5195 					break;
5196 				}
5197 			}
5198 		}
5199 		else
5200 		{
5201 			uint32_t ident = c.scalar(vector, 0);
5202 			for (uint32_t i = 1; i < c.vector_size(); i++)
5203 			{
5204 				if (ident != c.scalar(vector, i))
5205 				{
5206 					splat = false;
5207 					swizzle_splat = false;
5208 				}
5209 			}
5210 		}
5211 	}
5212 
5213 	if (c.vector_size() > 1 && !swizzle_splat)
5214 		res += type_to_glsl(type) + "(";
5215 
5216 	switch (type.basetype)
5217 	{
5218 	case SPIRType::Half:
5219 		if (splat || swizzle_splat)
5220 		{
5221 			res += convert_half_to_string(c, vector, 0);
5222 			if (swizzle_splat)
5223 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5224 		}
5225 		else
5226 		{
5227 			for (uint32_t i = 0; i < c.vector_size(); i++)
5228 			{
5229 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5230 					res += to_name(c.specialization_constant_id(vector, i));
5231 				else
5232 					res += convert_half_to_string(c, vector, i);
5233 
5234 				if (i + 1 < c.vector_size())
5235 					res += ", ";
5236 			}
5237 		}
5238 		break;
5239 
5240 	case SPIRType::Float:
5241 		if (splat || swizzle_splat)
5242 		{
5243 			res += convert_float_to_string(c, vector, 0);
5244 			if (swizzle_splat)
5245 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5246 		}
5247 		else
5248 		{
5249 			for (uint32_t i = 0; i < c.vector_size(); i++)
5250 			{
5251 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5252 					res += to_name(c.specialization_constant_id(vector, i));
5253 				else
5254 					res += convert_float_to_string(c, vector, i);
5255 
5256 				if (i + 1 < c.vector_size())
5257 					res += ", ";
5258 			}
5259 		}
5260 		break;
5261 
5262 	case SPIRType::Double:
5263 		if (splat || swizzle_splat)
5264 		{
5265 			res += convert_double_to_string(c, vector, 0);
5266 			if (swizzle_splat)
5267 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5268 		}
5269 		else
5270 		{
5271 			for (uint32_t i = 0; i < c.vector_size(); i++)
5272 			{
5273 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5274 					res += to_name(c.specialization_constant_id(vector, i));
5275 				else
5276 					res += convert_double_to_string(c, vector, i);
5277 
5278 				if (i + 1 < c.vector_size())
5279 					res += ", ";
5280 			}
5281 		}
5282 		break;
5283 
5284 	case SPIRType::Int64:
5285 		if (splat)
5286 		{
5287 			res += convert_to_string(c.scalar_i64(vector, 0));
5288 			if (backend.long_long_literal_suffix)
5289 				res += "ll";
5290 			else
5291 				res += "l";
5292 		}
5293 		else
5294 		{
5295 			for (uint32_t i = 0; i < c.vector_size(); i++)
5296 			{
5297 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5298 					res += to_name(c.specialization_constant_id(vector, i));
5299 				else
5300 				{
5301 					res += convert_to_string(c.scalar_i64(vector, i));
5302 					if (backend.long_long_literal_suffix)
5303 						res += "ll";
5304 					else
5305 						res += "l";
5306 				}
5307 
5308 				if (i + 1 < c.vector_size())
5309 					res += ", ";
5310 			}
5311 		}
5312 		break;
5313 
5314 	case SPIRType::UInt64:
5315 		if (splat)
5316 		{
5317 			res += convert_to_string(c.scalar_u64(vector, 0));
5318 			if (backend.long_long_literal_suffix)
5319 				res += "ull";
5320 			else
5321 				res += "ul";
5322 		}
5323 		else
5324 		{
5325 			for (uint32_t i = 0; i < c.vector_size(); i++)
5326 			{
5327 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5328 					res += to_name(c.specialization_constant_id(vector, i));
5329 				else
5330 				{
5331 					res += convert_to_string(c.scalar_u64(vector, i));
5332 					if (backend.long_long_literal_suffix)
5333 						res += "ull";
5334 					else
5335 						res += "ul";
5336 				}
5337 
5338 				if (i + 1 < c.vector_size())
5339 					res += ", ";
5340 			}
5341 		}
5342 		break;
5343 
5344 	case SPIRType::UInt:
5345 		if (splat)
5346 		{
5347 			res += convert_to_string(c.scalar(vector, 0));
5348 			if (is_legacy())
5349 			{
5350 				// Fake unsigned constant literals with signed ones if possible.
5351 				// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5352 				if (c.scalar_i32(vector, 0) < 0)
5353 					SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
5354 			}
5355 			else if (backend.uint32_t_literal_suffix)
5356 				res += "u";
5357 		}
5358 		else
5359 		{
5360 			for (uint32_t i = 0; i < c.vector_size(); i++)
5361 			{
5362 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5363 					res += to_name(c.specialization_constant_id(vector, i));
5364 				else
5365 				{
5366 					res += convert_to_string(c.scalar(vector, i));
5367 					if (is_legacy())
5368 					{
5369 						// Fake unsigned constant literals with signed ones if possible.
5370 						// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5371 						if (c.scalar_i32(vector, i) < 0)
5372 							SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
5373 							                  "the literal negative.");
5374 					}
5375 					else if (backend.uint32_t_literal_suffix)
5376 						res += "u";
5377 				}
5378 
5379 				if (i + 1 < c.vector_size())
5380 					res += ", ";
5381 			}
5382 		}
5383 		break;
5384 
5385 	case SPIRType::Int:
5386 		if (splat)
5387 			res += convert_to_string(c.scalar_i32(vector, 0));
5388 		else
5389 		{
5390 			for (uint32_t i = 0; i < c.vector_size(); i++)
5391 			{
5392 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5393 					res += to_name(c.specialization_constant_id(vector, i));
5394 				else
5395 					res += convert_to_string(c.scalar_i32(vector, i));
5396 				if (i + 1 < c.vector_size())
5397 					res += ", ";
5398 			}
5399 		}
5400 		break;
5401 
5402 	case SPIRType::UShort:
5403 		if (splat)
5404 		{
5405 			res += convert_to_string(c.scalar(vector, 0));
5406 		}
5407 		else
5408 		{
5409 			for (uint32_t i = 0; i < c.vector_size(); i++)
5410 			{
5411 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5412 					res += to_name(c.specialization_constant_id(vector, i));
5413 				else
5414 				{
5415 					if (*backend.uint16_t_literal_suffix)
5416 					{
5417 						res += convert_to_string(c.scalar_u16(vector, i));
5418 						res += backend.uint16_t_literal_suffix;
5419 					}
5420 					else
5421 					{
5422 						// If backend doesn't have a literal suffix, we need to value cast.
5423 						res += type_to_glsl(scalar_type);
5424 						res += "(";
5425 						res += convert_to_string(c.scalar_u16(vector, i));
5426 						res += ")";
5427 					}
5428 				}
5429 
5430 				if (i + 1 < c.vector_size())
5431 					res += ", ";
5432 			}
5433 		}
5434 		break;
5435 
5436 	case SPIRType::Short:
5437 		if (splat)
5438 		{
5439 			res += convert_to_string(c.scalar_i16(vector, 0));
5440 		}
5441 		else
5442 		{
5443 			for (uint32_t i = 0; i < c.vector_size(); i++)
5444 			{
5445 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5446 					res += to_name(c.specialization_constant_id(vector, i));
5447 				else
5448 				{
5449 					if (*backend.int16_t_literal_suffix)
5450 					{
5451 						res += convert_to_string(c.scalar_i16(vector, i));
5452 						res += backend.int16_t_literal_suffix;
5453 					}
5454 					else
5455 					{
5456 						// If backend doesn't have a literal suffix, we need to value cast.
5457 						res += type_to_glsl(scalar_type);
5458 						res += "(";
5459 						res += convert_to_string(c.scalar_i16(vector, i));
5460 						res += ")";
5461 					}
5462 				}
5463 
5464 				if (i + 1 < c.vector_size())
5465 					res += ", ";
5466 			}
5467 		}
5468 		break;
5469 
5470 	case SPIRType::UByte:
5471 		if (splat)
5472 		{
5473 			res += convert_to_string(c.scalar_u8(vector, 0));
5474 		}
5475 		else
5476 		{
5477 			for (uint32_t i = 0; i < c.vector_size(); i++)
5478 			{
5479 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5480 					res += to_name(c.specialization_constant_id(vector, i));
5481 				else
5482 				{
5483 					res += type_to_glsl(scalar_type);
5484 					res += "(";
5485 					res += convert_to_string(c.scalar_u8(vector, i));
5486 					res += ")";
5487 				}
5488 
5489 				if (i + 1 < c.vector_size())
5490 					res += ", ";
5491 			}
5492 		}
5493 		break;
5494 
5495 	case SPIRType::SByte:
5496 		if (splat)
5497 		{
5498 			res += convert_to_string(c.scalar_i8(vector, 0));
5499 		}
5500 		else
5501 		{
5502 			for (uint32_t i = 0; i < c.vector_size(); i++)
5503 			{
5504 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5505 					res += to_name(c.specialization_constant_id(vector, i));
5506 				else
5507 				{
5508 					res += type_to_glsl(scalar_type);
5509 					res += "(";
5510 					res += convert_to_string(c.scalar_i8(vector, i));
5511 					res += ")";
5512 				}
5513 
5514 				if (i + 1 < c.vector_size())
5515 					res += ", ";
5516 			}
5517 		}
5518 		break;
5519 
5520 	case SPIRType::Boolean:
5521 		if (splat)
5522 			res += c.scalar(vector, 0) ? "true" : "false";
5523 		else
5524 		{
5525 			for (uint32_t i = 0; i < c.vector_size(); i++)
5526 			{
5527 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5528 					res += to_name(c.specialization_constant_id(vector, i));
5529 				else
5530 					res += c.scalar(vector, i) ? "true" : "false";
5531 
5532 				if (i + 1 < c.vector_size())
5533 					res += ", ";
5534 			}
5535 		}
5536 		break;
5537 
5538 	default:
5539 		SPIRV_CROSS_THROW("Invalid constant expression basetype.");
5540 	}
5541 
5542 	if (c.vector_size() > 1 && !swizzle_splat)
5543 		res += ")";
5544 
5545 	return res;
5546 }
5547 
emit_uninitialized_temporary_expression(uint32_t type,uint32_t id)5548 SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
5549 {
5550 	forced_temporaries.insert(id);
5551 	emit_uninitialized_temporary(type, id);
5552 	return set<SPIRExpression>(id, to_name(id), type, true);
5553 }
5554 
emit_uninitialized_temporary(uint32_t result_type,uint32_t result_id)5555 void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
5556 {
5557 	// If we're declaring temporaries inside continue blocks,
5558 	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5559 	if (current_continue_block && !hoisted_temporaries.count(result_id))
5560 	{
5561 		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5562 		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5563 		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5564 			            return tmp.first == result_type && tmp.second == result_id;
5565 		            }) == end(header.declare_temporary))
5566 		{
5567 			header.declare_temporary.emplace_back(result_type, result_id);
5568 			hoisted_temporaries.insert(result_id);
5569 			force_recompile();
5570 		}
5571 	}
5572 	else if (hoisted_temporaries.count(result_id) == 0)
5573 	{
5574 		auto &type = get<SPIRType>(result_type);
5575 		auto &flags = ir.meta[result_id].decoration.decoration_flags;
5576 
5577 		// The result_id has not been made into an expression yet, so use flags interface.
5578 		add_local_variable_name(result_id);
5579 
5580 		string initializer;
5581 		if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
5582 			initializer = join(" = ", to_zero_initialized_expression(result_type));
5583 
5584 		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
5585 	}
5586 }
5587 
declare_temporary(uint32_t result_type,uint32_t result_id)5588 string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
5589 {
5590 	auto &type = get<SPIRType>(result_type);
5591 	auto &flags = ir.meta[result_id].decoration.decoration_flags;
5592 
5593 	// If we're declaring temporaries inside continue blocks,
5594 	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5595 	if (current_continue_block && !hoisted_temporaries.count(result_id))
5596 	{
5597 		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5598 		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5599 		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5600 			            return tmp.first == result_type && tmp.second == result_id;
5601 		            }) == end(header.declare_temporary))
5602 		{
5603 			header.declare_temporary.emplace_back(result_type, result_id);
5604 			hoisted_temporaries.insert(result_id);
5605 			force_recompile();
5606 		}
5607 
5608 		return join(to_name(result_id), " = ");
5609 	}
5610 	else if (hoisted_temporaries.count(result_id))
5611 	{
5612 		// The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
5613 		return join(to_name(result_id), " = ");
5614 	}
5615 	else
5616 	{
5617 		// The result_id has not been made into an expression yet, so use flags interface.
5618 		add_local_variable_name(result_id);
5619 		return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
5620 	}
5621 }
5622 
expression_is_forwarded(uint32_t id) const5623 bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
5624 {
5625 	return forwarded_temporaries.count(id) != 0;
5626 }
5627 
expression_suppresses_usage_tracking(uint32_t id) const5628 bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
5629 {
5630 	return suppressed_usage_tracking.count(id) != 0;
5631 }
5632 
expression_read_implies_multiple_reads(uint32_t id) const5633 bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
5634 {
5635 	auto *expr = maybe_get<SPIRExpression>(id);
5636 	if (!expr)
5637 		return false;
5638 
5639 	// If we're emitting code at a deeper loop level than when we emitted the expression,
5640 	// we're probably reading the same expression over and over.
5641 	return current_loop_level > expr->emitted_loop_level;
5642 }
5643 
emit_op(uint32_t result_type,uint32_t result_id,const string & rhs,bool forwarding,bool suppress_usage_tracking)5644 SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
5645                                       bool suppress_usage_tracking)
5646 {
5647 	if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
5648 	{
5649 		// Just forward it without temporary.
5650 		// If the forward is trivial, we do not force flushing to temporary for this expression.
5651 		forwarded_temporaries.insert(result_id);
5652 		if (suppress_usage_tracking)
5653 			suppressed_usage_tracking.insert(result_id);
5654 
5655 		return set<SPIRExpression>(result_id, rhs, result_type, true);
5656 	}
5657 	else
5658 	{
5659 		// If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
5660 		statement(declare_temporary(result_type, result_id), rhs, ";");
5661 		return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
5662 	}
5663 }
5664 
emit_unary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)5665 void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5666 {
5667 	bool forward = should_forward(op0);
5668 	emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
5669 	inherit_expression_dependencies(result_id, op0);
5670 }
5671 
emit_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5672 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
5673 {
5674 	// Various FP arithmetic opcodes such as add, sub, mul will hit this.
5675 	bool force_temporary_precise = backend.support_precise_qualifier &&
5676 	                               has_decoration(result_id, DecorationNoContraction) &&
5677 	                               type_is_floating_point(get<SPIRType>(result_type));
5678 	bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
5679 
5680 	emit_op(result_type, result_id,
5681 	        join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
5682 
5683 	inherit_expression_dependencies(result_id, op0);
5684 	inherit_expression_dependencies(result_id, op1);
5685 }
5686 
emit_unrolled_unary_op(uint32_t result_type,uint32_t result_id,uint32_t operand,const char * op)5687 void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
5688 {
5689 	auto &type = get<SPIRType>(result_type);
5690 	auto expr = type_to_glsl_constructor(type);
5691 	expr += '(';
5692 	for (uint32_t i = 0; i < type.vecsize; i++)
5693 	{
5694 		// Make sure to call to_expression multiple times to ensure
5695 		// that these expressions are properly flushed to temporaries if needed.
5696 		expr += op;
5697 		expr += to_extract_component_expression(operand, i);
5698 
5699 		if (i + 1 < type.vecsize)
5700 			expr += ", ";
5701 	}
5702 	expr += ')';
5703 	emit_op(result_type, result_id, expr, should_forward(operand));
5704 
5705 	inherit_expression_dependencies(result_id, operand);
5706 }
5707 
emit_unrolled_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,bool negate,SPIRType::BaseType expected_type)5708 void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5709                                            const char *op, bool negate, SPIRType::BaseType expected_type)
5710 {
5711 	auto &type0 = expression_type(op0);
5712 	auto &type1 = expression_type(op1);
5713 
5714 	SPIRType target_type0 = type0;
5715 	SPIRType target_type1 = type1;
5716 	target_type0.basetype = expected_type;
5717 	target_type1.basetype = expected_type;
5718 	target_type0.vecsize = 1;
5719 	target_type1.vecsize = 1;
5720 
5721 	auto &type = get<SPIRType>(result_type);
5722 	auto expr = type_to_glsl_constructor(type);
5723 	expr += '(';
5724 	for (uint32_t i = 0; i < type.vecsize; i++)
5725 	{
5726 		// Make sure to call to_expression multiple times to ensure
5727 		// that these expressions are properly flushed to temporaries if needed.
5728 		if (negate)
5729 			expr += "!(";
5730 
5731 		if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
5732 			expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
5733 		else
5734 			expr += to_extract_component_expression(op0, i);
5735 
5736 		expr += ' ';
5737 		expr += op;
5738 		expr += ' ';
5739 
5740 		if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
5741 			expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
5742 		else
5743 			expr += to_extract_component_expression(op1, i);
5744 
5745 		if (negate)
5746 			expr += ")";
5747 
5748 		if (i + 1 < type.vecsize)
5749 			expr += ", ";
5750 	}
5751 	expr += ')';
5752 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5753 
5754 	inherit_expression_dependencies(result_id, op0);
5755 	inherit_expression_dependencies(result_id, op1);
5756 }
5757 
binary_op_bitcast_helper(string & cast_op0,string & cast_op1,SPIRType::BaseType & input_type,uint32_t op0,uint32_t op1,bool skip_cast_if_equal_type)5758 SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
5759                                                 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
5760 {
5761 	auto &type0 = expression_type(op0);
5762 	auto &type1 = expression_type(op1);
5763 
5764 	// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
5765 	// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
5766 	// since equality test is exactly the same.
5767 	bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
5768 
5769 	// Create a fake type so we can bitcast to it.
5770 	// We only deal with regular arithmetic types here like int, uints and so on.
5771 	SPIRType expected_type;
5772 	expected_type.basetype = input_type;
5773 	expected_type.vecsize = type0.vecsize;
5774 	expected_type.columns = type0.columns;
5775 	expected_type.width = type0.width;
5776 
5777 	if (cast)
5778 	{
5779 		cast_op0 = bitcast_glsl(expected_type, op0);
5780 		cast_op1 = bitcast_glsl(expected_type, op1);
5781 	}
5782 	else
5783 	{
5784 		// If we don't cast, our actual input type is that of the first (or second) argument.
5785 		cast_op0 = to_enclosed_unpacked_expression(op0);
5786 		cast_op1 = to_enclosed_unpacked_expression(op1);
5787 		input_type = type0.basetype;
5788 	}
5789 
5790 	return expected_type;
5791 }
5792 
emit_complex_bitcast(uint32_t result_type,uint32_t id,uint32_t op0)5793 bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
5794 {
5795 	// Some bitcasts may require complex casting sequences, and are implemented here.
5796 	// Otherwise a simply unary function will do with bitcast_glsl_op.
5797 
5798 	auto &output_type = get<SPIRType>(result_type);
5799 	auto &input_type = expression_type(op0);
5800 	string expr;
5801 
5802 	if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
5803 		expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
5804 	else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
5805 	         input_type.vecsize == 2)
5806 		expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
5807 	else
5808 		return false;
5809 
5810 	emit_op(result_type, id, expr, should_forward(op0));
5811 	return true;
5812 }
5813 
emit_binary_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)5814 void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5815                                        const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
5816 {
5817 	string cast_op0, cast_op1;
5818 	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
5819 	auto &out_type = get<SPIRType>(result_type);
5820 
5821 	// We might have casted away from the result type, so bitcast again.
5822 	// For example, arithmetic right shift with uint inputs.
5823 	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
5824 	string expr;
5825 	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
5826 	{
5827 		expected_type.basetype = input_type;
5828 		expr = bitcast_glsl_op(out_type, expected_type);
5829 		expr += '(';
5830 		expr += join(cast_op0, " ", op, " ", cast_op1);
5831 		expr += ')';
5832 	}
5833 	else
5834 		expr += join(cast_op0, " ", op, " ", cast_op1);
5835 
5836 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5837 	inherit_expression_dependencies(result_id, op0);
5838 	inherit_expression_dependencies(result_id, op1);
5839 }
5840 
emit_unary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)5841 void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5842 {
5843 	bool forward = should_forward(op0);
5844 	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
5845 	inherit_expression_dependencies(result_id, op0);
5846 }
5847 
emit_binary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5848 void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5849                                        const char *op)
5850 {
5851 	bool forward = should_forward(op0) && should_forward(op1);
5852 	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
5853 	        forward);
5854 	inherit_expression_dependencies(result_id, op0);
5855 	inherit_expression_dependencies(result_id, op1);
5856 }
5857 
emit_atomic_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5858 void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5859                                        const char *op)
5860 {
5861 	forced_temporaries.insert(result_id);
5862 	emit_op(result_type, result_id,
5863 	        join(op, "(", to_non_uniform_aware_expression(op0), ", ",
5864 	             to_unpacked_expression(op1), ")"), false);
5865 	flush_all_atomic_capable_variables();
5866 }
5867 
emit_atomic_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op)5868 void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
5869                                        uint32_t op0, uint32_t op1, uint32_t op2,
5870                                        const char *op)
5871 {
5872 	forced_temporaries.insert(result_id);
5873 	emit_op(result_type, result_id,
5874 	        join(op, "(", to_non_uniform_aware_expression(op0), ", ",
5875 	             to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false);
5876 	flush_all_atomic_capable_variables();
5877 }
5878 
emit_unary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op,SPIRType::BaseType input_type,SPIRType::BaseType expected_result_type)5879 void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
5880                                            SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
5881 {
5882 	auto &out_type = get<SPIRType>(result_type);
5883 	auto &expr_type = expression_type(op0);
5884 	auto expected_type = out_type;
5885 
5886 	// Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
5887 	expected_type.basetype = input_type;
5888 	expected_type.width = expr_type.width;
5889 
5890 	string cast_op;
5891 	if (expr_type.basetype != input_type)
5892 	{
5893 		if (expr_type.basetype == SPIRType::Boolean)
5894 			cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")");
5895 		else
5896 			cast_op = bitcast_glsl(expected_type, op0);
5897 	}
5898 	else
5899 		cast_op = to_unpacked_expression(op0);
5900 
5901 	string expr;
5902 	if (out_type.basetype != expected_result_type)
5903 	{
5904 		expected_type.basetype = expected_result_type;
5905 		expected_type.width = out_type.width;
5906 		if (out_type.basetype == SPIRType::Boolean)
5907 			expr = type_to_glsl(out_type);
5908 		else
5909 			expr = bitcast_glsl_op(out_type, expected_type);
5910 		expr += '(';
5911 		expr += join(op, "(", cast_op, ")");
5912 		expr += ')';
5913 	}
5914 	else
5915 	{
5916 		expr += join(op, "(", cast_op, ")");
5917 	}
5918 
5919 	emit_op(result_type, result_id, expr, should_forward(op0));
5920 	inherit_expression_dependencies(result_id, op0);
5921 }
5922 
5923 // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
5924 // and different vector sizes all at once. Need a special purpose method here.
emit_trinary_func_op_bitextract(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType expected_result_type,SPIRType::BaseType input_type0,SPIRType::BaseType input_type1,SPIRType::BaseType input_type2)5925 void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5926                                                    uint32_t op2, const char *op,
5927                                                    SPIRType::BaseType expected_result_type,
5928                                                    SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
5929                                                    SPIRType::BaseType input_type2)
5930 {
5931 	auto &out_type = get<SPIRType>(result_type);
5932 	auto expected_type = out_type;
5933 	expected_type.basetype = input_type0;
5934 
5935 	string cast_op0 =
5936 	    expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5937 
5938 	auto op1_expr = to_unpacked_expression(op1);
5939 	auto op2_expr = to_unpacked_expression(op2);
5940 
5941 	// Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
5942 	expected_type.basetype = input_type1;
5943 	expected_type.vecsize = 1;
5944 	string cast_op1 = expression_type(op1).basetype != input_type1 ?
5945 	                      join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
5946 	                      op1_expr;
5947 
5948 	expected_type.basetype = input_type2;
5949 	expected_type.vecsize = 1;
5950 	string cast_op2 = expression_type(op2).basetype != input_type2 ?
5951 	                      join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
5952 	                      op2_expr;
5953 
5954 	string expr;
5955 	if (out_type.basetype != expected_result_type)
5956 	{
5957 		expected_type.vecsize = out_type.vecsize;
5958 		expected_type.basetype = expected_result_type;
5959 		expr = bitcast_glsl_op(out_type, expected_type);
5960 		expr += '(';
5961 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5962 		expr += ')';
5963 	}
5964 	else
5965 	{
5966 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5967 	}
5968 
5969 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
5970 	inherit_expression_dependencies(result_id, op0);
5971 	inherit_expression_dependencies(result_id, op1);
5972 	inherit_expression_dependencies(result_id, op2);
5973 }
5974 
emit_trinary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType input_type)5975 void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5976                                              uint32_t op2, const char *op, SPIRType::BaseType input_type)
5977 {
5978 	auto &out_type = get<SPIRType>(result_type);
5979 	auto expected_type = out_type;
5980 	expected_type.basetype = input_type;
5981 	string cast_op0 =
5982 	    expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5983 	string cast_op1 =
5984 	    expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
5985 	string cast_op2 =
5986 	    expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
5987 
5988 	string expr;
5989 	if (out_type.basetype != input_type)
5990 	{
5991 		expr = bitcast_glsl_op(out_type, expected_type);
5992 		expr += '(';
5993 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5994 		expr += ')';
5995 	}
5996 	else
5997 	{
5998 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5999 	}
6000 
6001 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
6002 	inherit_expression_dependencies(result_id, op0);
6003 	inherit_expression_dependencies(result_id, op1);
6004 	inherit_expression_dependencies(result_id, op2);
6005 }
6006 
emit_binary_func_op_cast_clustered(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type)6007 void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
6008                                                       uint32_t op1, const char *op, SPIRType::BaseType input_type)
6009 {
6010 	// Special purpose method for implementing clustered subgroup opcodes.
6011 	// Main difference is that op1 does not participate in any casting, it needs to be a literal.
6012 	auto &out_type = get<SPIRType>(result_type);
6013 	auto expected_type = out_type;
6014 	expected_type.basetype = input_type;
6015 	string cast_op0 =
6016 	    expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
6017 
6018 	string expr;
6019 	if (out_type.basetype != input_type)
6020 	{
6021 		expr = bitcast_glsl_op(out_type, expected_type);
6022 		expr += '(';
6023 		expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
6024 		expr += ')';
6025 	}
6026 	else
6027 	{
6028 		expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
6029 	}
6030 
6031 	emit_op(result_type, result_id, expr, should_forward(op0));
6032 	inherit_expression_dependencies(result_id, op0);
6033 }
6034 
emit_binary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)6035 void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6036                                             const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
6037 {
6038 	string cast_op0, cast_op1;
6039 	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
6040 	auto &out_type = get<SPIRType>(result_type);
6041 
6042 	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
6043 	string expr;
6044 	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
6045 	{
6046 		expected_type.basetype = input_type;
6047 		expr = bitcast_glsl_op(out_type, expected_type);
6048 		expr += '(';
6049 		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
6050 		expr += ')';
6051 	}
6052 	else
6053 	{
6054 		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
6055 	}
6056 
6057 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
6058 	inherit_expression_dependencies(result_id, op0);
6059 	inherit_expression_dependencies(result_id, op1);
6060 }
6061 
emit_trinary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op)6062 void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6063                                         uint32_t op2, const char *op)
6064 {
6065 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
6066 	emit_op(result_type, result_id,
6067 	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
6068 	             to_unpacked_expression(op2), ")"),
6069 	        forward);
6070 
6071 	inherit_expression_dependencies(result_id, op0);
6072 	inherit_expression_dependencies(result_id, op1);
6073 	inherit_expression_dependencies(result_id, op2);
6074 }
6075 
emit_quaternary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op)6076 void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6077                                            uint32_t op2, uint32_t op3, const char *op)
6078 {
6079 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
6080 	emit_op(result_type, result_id,
6081 	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
6082 	             to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
6083 	        forward);
6084 
6085 	inherit_expression_dependencies(result_id, op0);
6086 	inherit_expression_dependencies(result_id, op1);
6087 	inherit_expression_dependencies(result_id, op2);
6088 	inherit_expression_dependencies(result_id, op3);
6089 }
6090 
emit_bitfield_insert_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op,SPIRType::BaseType offset_count_type)6091 void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6092                                            uint32_t op2, uint32_t op3, const char *op,
6093                                            SPIRType::BaseType offset_count_type)
6094 {
6095 	// Only need to cast offset/count arguments. Types of base/insert must be same as result type,
6096 	// and bitfieldInsert is sign invariant.
6097 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
6098 
6099 	auto op0_expr = to_unpacked_expression(op0);
6100 	auto op1_expr = to_unpacked_expression(op1);
6101 	auto op2_expr = to_unpacked_expression(op2);
6102 	auto op3_expr = to_unpacked_expression(op3);
6103 
6104 	SPIRType target_type;
6105 	target_type.vecsize = 1;
6106 	target_type.basetype = offset_count_type;
6107 
6108 	if (expression_type(op2).basetype != offset_count_type)
6109 	{
6110 		// Value-cast here. Input might be 16-bit. GLSL requires int.
6111 		op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
6112 	}
6113 
6114 	if (expression_type(op3).basetype != offset_count_type)
6115 	{
6116 		// Value-cast here. Input might be 16-bit. GLSL requires int.
6117 		op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
6118 	}
6119 
6120 	emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
6121 	        forward);
6122 
6123 	inherit_expression_dependencies(result_id, op0);
6124 	inherit_expression_dependencies(result_id, op1);
6125 	inherit_expression_dependencies(result_id, op2);
6126 	inherit_expression_dependencies(result_id, op3);
6127 }
6128 
legacy_tex_op(const std::string & op,const SPIRType & imgtype,uint32_t tex)6129 string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
6130 {
6131 	const char *type;
6132 	switch (imgtype.image.dim)
6133 	{
6134 	case spv::Dim1D:
6135 		type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
6136 		break;
6137 	case spv::Dim2D:
6138 		type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
6139 		break;
6140 	case spv::Dim3D:
6141 		type = "3D";
6142 		break;
6143 	case spv::DimCube:
6144 		type = "Cube";
6145 		break;
6146 	case spv::DimRect:
6147 		type = "2DRect";
6148 		break;
6149 	case spv::DimBuffer:
6150 		type = "Buffer";
6151 		break;
6152 	case spv::DimSubpassData:
6153 		type = "2D";
6154 		break;
6155 	default:
6156 		type = "";
6157 		break;
6158 	}
6159 
6160 	// In legacy GLSL, an extension is required for textureLod in the fragment
6161 	// shader or textureGrad anywhere.
6162 	bool legacy_lod_ext = false;
6163 	auto &execution = get_entry_point();
6164 	if (op == "textureGrad" || op == "textureProjGrad" ||
6165 	    ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
6166 	{
6167 		if (is_legacy_es())
6168 		{
6169 			legacy_lod_ext = true;
6170 			require_extension_internal("GL_EXT_shader_texture_lod");
6171 		}
6172 		else if (is_legacy_desktop())
6173 			require_extension_internal("GL_ARB_shader_texture_lod");
6174 	}
6175 
6176 	if (op == "textureLodOffset" || op == "textureProjLodOffset")
6177 	{
6178 		if (is_legacy_es())
6179 			SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
6180 
6181 		require_extension_internal("GL_EXT_gpu_shader4");
6182 	}
6183 
6184 	// GLES has very limited support for shadow samplers.
6185 	// Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
6186 	// everything else can just throw
6187 	bool is_comparison = image_is_comparison(imgtype, tex);
6188 	if (is_comparison && is_legacy_es())
6189 	{
6190 		if (op == "texture" || op == "textureProj")
6191 			require_extension_internal("GL_EXT_shadow_samplers");
6192 		else
6193 			SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
6194 	}
6195 
6196 	if (op == "textureSize")
6197 	{
6198 		if (is_legacy_es())
6199 			SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
6200 		if (is_comparison)
6201 			SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
6202 		require_extension_internal("GL_EXT_gpu_shader4");
6203 	}
6204 
6205 	if (op == "texelFetch" && is_legacy_es())
6206 		SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
6207 
6208 	bool is_es_and_depth = is_legacy_es() && is_comparison;
6209 	std::string type_prefix = is_comparison ? "shadow" : "texture";
6210 
6211 	if (op == "texture")
6212 		return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
6213 	else if (op == "textureLod")
6214 		return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
6215 	else if (op == "textureProj")
6216 		return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
6217 	else if (op == "textureGrad")
6218 		return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
6219 	else if (op == "textureProjLod")
6220 		return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
6221 	else if (op == "textureLodOffset")
6222 		return join(type_prefix, type, "LodOffset");
6223 	else if (op == "textureProjGrad")
6224 		return join(type_prefix, type,
6225 		            is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
6226 	else if (op == "textureProjLodOffset")
6227 		return join(type_prefix, type, "ProjLodOffset");
6228 	else if (op == "textureSize")
6229 		return join("textureSize", type);
6230 	else if (op == "texelFetch")
6231 		return join("texelFetch", type);
6232 	else
6233 	{
6234 		SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
6235 	}
6236 }
6237 
to_trivial_mix_op(const SPIRType & type,string & op,uint32_t left,uint32_t right,uint32_t lerp)6238 bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
6239 {
6240 	auto *cleft = maybe_get<SPIRConstant>(left);
6241 	auto *cright = maybe_get<SPIRConstant>(right);
6242 	auto &lerptype = expression_type(lerp);
6243 
6244 	// If our targets aren't constants, we cannot use construction.
6245 	if (!cleft || !cright)
6246 		return false;
6247 
6248 	// If our targets are spec constants, we cannot use construction.
6249 	if (cleft->specialization || cright->specialization)
6250 		return false;
6251 
6252 	auto &value_type = get<SPIRType>(cleft->constant_type);
6253 
6254 	if (lerptype.basetype != SPIRType::Boolean)
6255 		return false;
6256 	if (value_type.basetype == SPIRType::Struct || is_array(value_type))
6257 		return false;
6258 	if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
6259 		return false;
6260 
6261 	// If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
6262 	bool ret = true;
6263 	for (uint32_t col = 0; col < value_type.columns; col++)
6264 	{
6265 		for (uint32_t row = 0; row < value_type.vecsize; row++)
6266 		{
6267 			switch (type.basetype)
6268 			{
6269 			case SPIRType::Short:
6270 			case SPIRType::UShort:
6271 				ret = cleft->scalar_u16(col, row) == 0 && cright->scalar_u16(col, row) == 1;
6272 				break;
6273 
6274 			case SPIRType::Int:
6275 			case SPIRType::UInt:
6276 				ret = cleft->scalar(col, row) == 0 && cright->scalar(col, row) == 1;
6277 				break;
6278 
6279 			case SPIRType::Half:
6280 				ret = cleft->scalar_f16(col, row) == 0.0f && cright->scalar_f16(col, row) == 1.0f;
6281 				break;
6282 
6283 			case SPIRType::Float:
6284 				ret = cleft->scalar_f32(col, row) == 0.0f && cright->scalar_f32(col, row) == 1.0f;
6285 				break;
6286 
6287 			case SPIRType::Double:
6288 				ret = cleft->scalar_f64(col, row) == 0.0 && cright->scalar_f64(col, row) == 1.0;
6289 				break;
6290 
6291 			case SPIRType::Int64:
6292 			case SPIRType::UInt64:
6293 				ret = cleft->scalar_u64(col, row) == 0 && cright->scalar_u64(col, row) == 1;
6294 				break;
6295 
6296 			default:
6297 				return false;
6298 			}
6299 		}
6300 
6301 		if (!ret)
6302 			break;
6303 	}
6304 
6305 	if (ret)
6306 		op = type_to_glsl_constructor(type);
6307 	return ret;
6308 }
6309 
to_ternary_expression(const SPIRType & restype,uint32_t select,uint32_t true_value,uint32_t false_value)6310 string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
6311                                            uint32_t false_value)
6312 {
6313 	string expr;
6314 	auto &lerptype = expression_type(select);
6315 
6316 	if (lerptype.vecsize == 1)
6317 		expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
6318 		            to_enclosed_pointer_expression(false_value));
6319 	else
6320 	{
6321 		auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
6322 
6323 		expr = type_to_glsl_constructor(restype);
6324 		expr += "(";
6325 		for (uint32_t i = 0; i < restype.vecsize; i++)
6326 		{
6327 			expr += swiz(select, i);
6328 			expr += " ? ";
6329 			expr += swiz(true_value, i);
6330 			expr += " : ";
6331 			expr += swiz(false_value, i);
6332 			if (i + 1 < restype.vecsize)
6333 				expr += ", ";
6334 		}
6335 		expr += ")";
6336 	}
6337 
6338 	return expr;
6339 }
6340 
emit_mix_op(uint32_t result_type,uint32_t id,uint32_t left,uint32_t right,uint32_t lerp)6341 void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
6342 {
6343 	auto &lerptype = expression_type(lerp);
6344 	auto &restype = get<SPIRType>(result_type);
6345 
6346 	// If this results in a variable pointer, assume it may be written through.
6347 	if (restype.pointer)
6348 	{
6349 		register_write(left);
6350 		register_write(right);
6351 	}
6352 
6353 	string mix_op;
6354 	bool has_boolean_mix = *backend.boolean_mix_function &&
6355 	                       ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
6356 	bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
6357 
6358 	// Cannot use boolean mix when the lerp argument is just one boolean,
6359 	// fall back to regular trinary statements.
6360 	if (lerptype.vecsize == 1)
6361 		has_boolean_mix = false;
6362 
6363 	// If we can reduce the mix to a simple cast, do so.
6364 	// This helps for cases like int(bool), uint(bool) which is implemented with
6365 	// OpSelect bool 1 0.
6366 	if (trivial_mix)
6367 	{
6368 		emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
6369 	}
6370 	else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
6371 	{
6372 		// Boolean mix not supported on desktop without extension.
6373 		// Was added in OpenGL 4.5 with ES 3.1 compat.
6374 		//
6375 		// Could use GL_EXT_shader_integer_mix on desktop at least,
6376 		// but Apple doesn't support it. :(
6377 		// Just implement it as ternary expressions.
6378 		auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
6379 		emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
6380 		inherit_expression_dependencies(id, left);
6381 		inherit_expression_dependencies(id, right);
6382 		inherit_expression_dependencies(id, lerp);
6383 	}
6384 	else if (lerptype.basetype == SPIRType::Boolean)
6385 		emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
6386 	else
6387 		emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
6388 }
6389 
to_combined_image_sampler(VariableID image_id,VariableID samp_id)6390 string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
6391 {
6392 	// Keep track of the array indices we have used to load the image.
6393 	// We'll need to use the same array index into the combined image sampler array.
6394 	auto image_expr = to_non_uniform_aware_expression(image_id);
6395 	string array_expr;
6396 	auto array_index = image_expr.find_first_of('[');
6397 	if (array_index != string::npos)
6398 		array_expr = image_expr.substr(array_index, string::npos);
6399 
6400 	auto &args = current_function->arguments;
6401 
6402 	// For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
6403 	// all possible combinations into new sampler2D uniforms.
6404 	auto *image = maybe_get_backing_variable(image_id);
6405 	auto *samp = maybe_get_backing_variable(samp_id);
6406 	if (image)
6407 		image_id = image->self;
6408 	if (samp)
6409 		samp_id = samp->self;
6410 
6411 	auto image_itr = find_if(begin(args), end(args),
6412 	                         [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
6413 
6414 	auto sampler_itr = find_if(begin(args), end(args),
6415 	                           [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
6416 
6417 	if (image_itr != end(args) || sampler_itr != end(args))
6418 	{
6419 		// If any parameter originates from a parameter, we will find it in our argument list.
6420 		bool global_image = image_itr == end(args);
6421 		bool global_sampler = sampler_itr == end(args);
6422 		VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
6423 		VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
6424 
6425 		auto &combined = current_function->combined_parameters;
6426 		auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
6427 			return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
6428 			       p.sampler_id == sid;
6429 		});
6430 
6431 		if (itr != end(combined))
6432 			return to_expression(itr->id) + array_expr;
6433 		else
6434 		{
6435 			SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
6436 			                  "build_combined_image_samplers() used "
6437 			                  "before compile() was called?");
6438 		}
6439 	}
6440 	else
6441 	{
6442 		// For global sampler2D, look directly at the global remapping table.
6443 		auto &mapping = combined_image_samplers;
6444 		auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
6445 			return combined.image_id == image_id && combined.sampler_id == samp_id;
6446 		});
6447 
6448 		if (itr != end(combined_image_samplers))
6449 			return to_expression(itr->combined_id) + array_expr;
6450 		else
6451 		{
6452 			SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
6453 			                  "before compile() was called?");
6454 		}
6455 	}
6456 }
6457 
is_supported_subgroup_op_in_opengl(spv::Op op)6458 bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
6459 {
6460 	switch (op)
6461 	{
6462 	case OpGroupNonUniformElect:
6463 	case OpGroupNonUniformBallot:
6464 	case OpGroupNonUniformBallotFindLSB:
6465 	case OpGroupNonUniformBallotFindMSB:
6466 	case OpGroupNonUniformBroadcast:
6467 	case OpGroupNonUniformBroadcastFirst:
6468 	case OpGroupNonUniformAll:
6469 	case OpGroupNonUniformAny:
6470 	case OpGroupNonUniformAllEqual:
6471 	case OpControlBarrier:
6472 	case OpMemoryBarrier:
6473 	case OpGroupNonUniformBallotBitCount:
6474 	case OpGroupNonUniformBallotBitExtract:
6475 	case OpGroupNonUniformInverseBallot:
6476 		return true;
6477 	default:
6478 		return false;
6479 	}
6480 }
6481 
emit_sampled_image_op(uint32_t result_type,uint32_t result_id,uint32_t image_id,uint32_t samp_id)6482 void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
6483 {
6484 	if (options.vulkan_semantics && combined_image_samplers.empty())
6485 	{
6486 		emit_binary_func_op(result_type, result_id, image_id, samp_id,
6487 		                    type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
6488 	}
6489 	else
6490 	{
6491 		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
6492 		emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
6493 	}
6494 
6495 	// Make sure to suppress usage tracking and any expression invalidation.
6496 	// It is illegal to create temporaries of opaque types.
6497 	forwarded_temporaries.erase(result_id);
6498 }
6499 
image_opcode_is_sample_no_dref(Op op)6500 static inline bool image_opcode_is_sample_no_dref(Op op)
6501 {
6502 	switch (op)
6503 	{
6504 	case OpImageSampleExplicitLod:
6505 	case OpImageSampleImplicitLod:
6506 	case OpImageSampleProjExplicitLod:
6507 	case OpImageSampleProjImplicitLod:
6508 	case OpImageFetch:
6509 	case OpImageRead:
6510 	case OpImageSparseSampleExplicitLod:
6511 	case OpImageSparseSampleImplicitLod:
6512 	case OpImageSparseSampleProjExplicitLod:
6513 	case OpImageSparseSampleProjImplicitLod:
6514 	case OpImageSparseFetch:
6515 	case OpImageSparseRead:
6516 		return true;
6517 
6518 	default:
6519 		return false;
6520 	}
6521 }
6522 
emit_sparse_feedback_temporaries(uint32_t result_type_id,uint32_t id,uint32_t & feedback_id,uint32_t & texel_id)6523 void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
6524                                                     uint32_t &texel_id)
6525 {
6526 	// Need to allocate two temporaries.
6527 	if (options.es)
6528 		SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
6529 	require_extension_internal("GL_ARB_sparse_texture2");
6530 
6531 	auto &temps = extra_sub_expressions[id];
6532 	if (temps == 0)
6533 		temps = ir.increase_bound_by(2);
6534 
6535 	feedback_id = temps + 0;
6536 	texel_id = temps + 1;
6537 
6538 	auto &return_type = get<SPIRType>(result_type_id);
6539 	if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
6540 		SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
6541 	emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
6542 	emit_uninitialized_temporary(return_type.member_types[1], texel_id);
6543 }
6544 
get_sparse_feedback_texel_id(uint32_t id) const6545 uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
6546 {
6547 	auto itr = extra_sub_expressions.find(id);
6548 	if (itr == extra_sub_expressions.end())
6549 		return 0;
6550 	else
6551 		return itr->second + 1;
6552 }
6553 
emit_texture_op(const Instruction & i,bool sparse)6554 void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
6555 {
6556 	auto *ops = stream(i);
6557 	auto op = static_cast<Op>(i.op);
6558 
6559 	SmallVector<uint32_t> inherited_expressions;
6560 
6561 	uint32_t result_type_id = ops[0];
6562 	uint32_t id = ops[1];
6563 	auto &return_type = get<SPIRType>(result_type_id);
6564 
6565 	uint32_t sparse_code_id = 0;
6566 	uint32_t sparse_texel_id = 0;
6567 	if (sparse)
6568 		emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
6569 
6570 	bool forward = false;
6571 	string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
6572 
6573 	if (sparse)
6574 	{
6575 		statement(to_expression(sparse_code_id), " = ", expr, ";");
6576 		expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
6577 		            ")");
6578 		forward = true;
6579 		inherited_expressions.clear();
6580 	}
6581 
6582 	emit_op(result_type_id, id, expr, forward);
6583 	for (auto &inherit : inherited_expressions)
6584 		inherit_expression_dependencies(id, inherit);
6585 
6586 	// Do not register sparse ops as control dependent as they are always lowered to a temporary.
6587 	switch (op)
6588 	{
6589 	case OpImageSampleDrefImplicitLod:
6590 	case OpImageSampleImplicitLod:
6591 	case OpImageSampleProjImplicitLod:
6592 	case OpImageSampleProjDrefImplicitLod:
6593 		register_control_dependent_expression(id);
6594 		break;
6595 
6596 	default:
6597 		break;
6598 	}
6599 }
6600 
to_texture_op(const Instruction & i,bool sparse,bool * forward,SmallVector<uint32_t> & inherited_expressions)6601 std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
6602                                         SmallVector<uint32_t> &inherited_expressions)
6603 {
6604 	auto *ops = stream(i);
6605 	auto op = static_cast<Op>(i.op);
6606 	uint32_t length = i.length;
6607 
6608 	uint32_t result_type_id = ops[0];
6609 	VariableID img = ops[2];
6610 	uint32_t coord = ops[3];
6611 	uint32_t dref = 0;
6612 	uint32_t comp = 0;
6613 	bool gather = false;
6614 	bool proj = false;
6615 	bool fetch = false;
6616 	bool nonuniform_expression = false;
6617 	const uint32_t *opt = nullptr;
6618 
6619 	auto &result_type = get<SPIRType>(result_type_id);
6620 
6621 	inherited_expressions.push_back(coord);
6622 	if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img))
6623 		nonuniform_expression = true;
6624 
6625 	switch (op)
6626 	{
6627 	case OpImageSampleDrefImplicitLod:
6628 	case OpImageSampleDrefExplicitLod:
6629 	case OpImageSparseSampleDrefImplicitLod:
6630 	case OpImageSparseSampleDrefExplicitLod:
6631 		dref = ops[4];
6632 		opt = &ops[5];
6633 		length -= 5;
6634 		break;
6635 
6636 	case OpImageSampleProjDrefImplicitLod:
6637 	case OpImageSampleProjDrefExplicitLod:
6638 	case OpImageSparseSampleProjDrefImplicitLod:
6639 	case OpImageSparseSampleProjDrefExplicitLod:
6640 		dref = ops[4];
6641 		opt = &ops[5];
6642 		length -= 5;
6643 		proj = true;
6644 		break;
6645 
6646 	case OpImageDrefGather:
6647 	case OpImageSparseDrefGather:
6648 		dref = ops[4];
6649 		opt = &ops[5];
6650 		length -= 5;
6651 		gather = true;
6652 		if (options.es && options.version < 310)
6653 			SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6654 		else if (!options.es && options.version < 400)
6655 			SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
6656 		break;
6657 
6658 	case OpImageGather:
6659 	case OpImageSparseGather:
6660 		comp = ops[4];
6661 		opt = &ops[5];
6662 		length -= 5;
6663 		gather = true;
6664 		if (options.es && options.version < 310)
6665 			SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6666 		else if (!options.es && options.version < 400)
6667 		{
6668 			if (!expression_is_constant_null(comp))
6669 				SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
6670 			require_extension_internal("GL_ARB_texture_gather");
6671 		}
6672 		break;
6673 
6674 	case OpImageFetch:
6675 	case OpImageSparseFetch:
6676 	case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
6677 		opt = &ops[4];
6678 		length -= 4;
6679 		fetch = true;
6680 		break;
6681 
6682 	case OpImageSampleProjImplicitLod:
6683 	case OpImageSampleProjExplicitLod:
6684 	case OpImageSparseSampleProjImplicitLod:
6685 	case OpImageSparseSampleProjExplicitLod:
6686 		opt = &ops[4];
6687 		length -= 4;
6688 		proj = true;
6689 		break;
6690 
6691 	default:
6692 		opt = &ops[4];
6693 		length -= 4;
6694 		break;
6695 	}
6696 
6697 	// Bypass pointers because we need the real image struct
6698 	auto &type = expression_type(img);
6699 	auto &imgtype = get<SPIRType>(type.self);
6700 
6701 	uint32_t coord_components = 0;
6702 	switch (imgtype.image.dim)
6703 	{
6704 	case spv::Dim1D:
6705 		coord_components = 1;
6706 		break;
6707 	case spv::Dim2D:
6708 		coord_components = 2;
6709 		break;
6710 	case spv::Dim3D:
6711 		coord_components = 3;
6712 		break;
6713 	case spv::DimCube:
6714 		coord_components = 3;
6715 		break;
6716 	case spv::DimBuffer:
6717 		coord_components = 1;
6718 		break;
6719 	default:
6720 		coord_components = 2;
6721 		break;
6722 	}
6723 
6724 	if (dref)
6725 		inherited_expressions.push_back(dref);
6726 
6727 	if (proj)
6728 		coord_components++;
6729 	if (imgtype.image.arrayed)
6730 		coord_components++;
6731 
6732 	uint32_t bias = 0;
6733 	uint32_t lod = 0;
6734 	uint32_t grad_x = 0;
6735 	uint32_t grad_y = 0;
6736 	uint32_t coffset = 0;
6737 	uint32_t offset = 0;
6738 	uint32_t coffsets = 0;
6739 	uint32_t sample = 0;
6740 	uint32_t minlod = 0;
6741 	uint32_t flags = 0;
6742 
6743 	if (length)
6744 	{
6745 		flags = *opt++;
6746 		length--;
6747 	}
6748 
6749 	auto test = [&](uint32_t &v, uint32_t flag) {
6750 		if (length && (flags & flag))
6751 		{
6752 			v = *opt++;
6753 			inherited_expressions.push_back(v);
6754 			length--;
6755 		}
6756 	};
6757 
6758 	test(bias, ImageOperandsBiasMask);
6759 	test(lod, ImageOperandsLodMask);
6760 	test(grad_x, ImageOperandsGradMask);
6761 	test(grad_y, ImageOperandsGradMask);
6762 	test(coffset, ImageOperandsConstOffsetMask);
6763 	test(offset, ImageOperandsOffsetMask);
6764 	test(coffsets, ImageOperandsConstOffsetsMask);
6765 	test(sample, ImageOperandsSampleMask);
6766 	test(minlod, ImageOperandsMinLodMask);
6767 
6768 	TextureFunctionBaseArguments base_args = {};
6769 	base_args.img = img;
6770 	base_args.imgtype = &imgtype;
6771 	base_args.is_fetch = fetch != 0;
6772 	base_args.is_gather = gather != 0;
6773 	base_args.is_proj = proj != 0;
6774 
6775 	string expr;
6776 	TextureFunctionNameArguments name_args = {};
6777 
6778 	name_args.base = base_args;
6779 	name_args.has_array_offsets = coffsets != 0;
6780 	name_args.has_offset = coffset != 0 || offset != 0;
6781 	name_args.has_grad = grad_x != 0 || grad_y != 0;
6782 	name_args.has_dref = dref != 0;
6783 	name_args.is_sparse_feedback = sparse;
6784 	name_args.has_min_lod = minlod != 0;
6785 	name_args.lod = lod;
6786 	expr += to_function_name(name_args);
6787 	expr += "(";
6788 
6789 	uint32_t sparse_texel_id = 0;
6790 	if (sparse)
6791 		sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
6792 
6793 	TextureFunctionArguments args = {};
6794 	args.base = base_args;
6795 	args.coord = coord;
6796 	args.coord_components = coord_components;
6797 	args.dref = dref;
6798 	args.grad_x = grad_x;
6799 	args.grad_y = grad_y;
6800 	args.lod = lod;
6801 	args.coffset = coffset;
6802 	args.offset = offset;
6803 	args.bias = bias;
6804 	args.component = comp;
6805 	args.sample = sample;
6806 	args.sparse_texel = sparse_texel_id;
6807 	args.min_lod = minlod;
6808 	args.nonuniform_expression = nonuniform_expression;
6809 	expr += to_function_args(args, forward);
6810 	expr += ")";
6811 
6812 	// texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
6813 	if (is_legacy() && image_is_comparison(imgtype, img))
6814 		expr += ".r";
6815 
6816 	// Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
6817 	// Remap back to 4 components as sampling opcodes expect.
6818 	if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
6819 	{
6820 		bool image_is_depth = false;
6821 		const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
6822 		VariableID image_id = combined ? combined->image : img;
6823 
6824 		if (combined && image_is_comparison(imgtype, combined->image))
6825 			image_is_depth = true;
6826 		else if (image_is_comparison(imgtype, img))
6827 			image_is_depth = true;
6828 
6829 		// We must also check the backing variable for the image.
6830 		// We might have loaded an OpImage, and used that handle for two different purposes.
6831 		// Once with comparison, once without.
6832 		auto *image_variable = maybe_get_backing_variable(image_id);
6833 		if (image_variable && image_is_comparison(get<SPIRType>(image_variable->basetype), image_variable->self))
6834 			image_is_depth = true;
6835 
6836 		if (image_is_depth)
6837 			expr = remap_swizzle(result_type, 1, expr);
6838 	}
6839 
6840 	if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
6841 	{
6842 		// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
6843 		// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
6844 		expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
6845 	}
6846 
6847 	// Deals with reads from MSL. We might need to downconvert to fewer components.
6848 	if (op == OpImageRead)
6849 		expr = remap_swizzle(result_type, 4, expr);
6850 
6851 	return expr;
6852 }
6853 
expression_is_constant_null(uint32_t id) const6854 bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
6855 {
6856 	auto *c = maybe_get<SPIRConstant>(id);
6857 	if (!c)
6858 		return false;
6859 	return c->constant_is_null();
6860 }
6861 
expression_is_non_value_type_array(uint32_t ptr)6862 bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
6863 {
6864 	auto &type = expression_type(ptr);
6865 	if (type.array.empty())
6866 		return false;
6867 
6868 	if (!backend.array_is_value_type)
6869 		return true;
6870 
6871 	auto *var = maybe_get_backing_variable(ptr);
6872 	if (!var)
6873 		return false;
6874 
6875 	auto &backed_type = get<SPIRType>(var->basetype);
6876 	return !backend.buffer_offset_array_is_value_type && backed_type.basetype == SPIRType::Struct &&
6877 	       has_member_decoration(backed_type.self, 0, DecorationOffset);
6878 }
6879 
6880 // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
6881 // For some subclasses, the function is a method on the specified image.
to_function_name(const TextureFunctionNameArguments & args)6882 string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
6883 {
6884 	if (args.has_min_lod)
6885 	{
6886 		if (options.es)
6887 			SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
6888 		require_extension_internal("GL_ARB_sparse_texture_clamp");
6889 	}
6890 
6891 	string fname;
6892 	auto &imgtype = *args.base.imgtype;
6893 	VariableID tex = args.base.img;
6894 
6895 	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
6896 	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
6897 	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
6898 	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
6899 	bool workaround_lod_array_shadow_as_grad = false;
6900 	if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
6901 	    image_is_comparison(imgtype, tex) && args.lod)
6902 	{
6903 		if (!expression_is_constant_null(args.lod))
6904 		{
6905 			SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
6906 			                  "expressed in GLSL.");
6907 		}
6908 		workaround_lod_array_shadow_as_grad = true;
6909 	}
6910 
6911 	if (args.is_sparse_feedback)
6912 		fname += "sparse";
6913 
6914 	if (args.base.is_fetch)
6915 		fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
6916 	else
6917 	{
6918 		fname += args.is_sparse_feedback ? "Texture" : "texture";
6919 
6920 		if (args.base.is_gather)
6921 			fname += "Gather";
6922 		if (args.has_array_offsets)
6923 			fname += "Offsets";
6924 		if (args.base.is_proj)
6925 			fname += "Proj";
6926 		if (args.has_grad || workaround_lod_array_shadow_as_grad)
6927 			fname += "Grad";
6928 		if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
6929 			fname += "Lod";
6930 	}
6931 
6932 	if (args.has_offset)
6933 		fname += "Offset";
6934 
6935 	if (args.has_min_lod)
6936 		fname += "Clamp";
6937 
6938 	if (args.is_sparse_feedback || args.has_min_lod)
6939 		fname += "ARB";
6940 
6941 	return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
6942 }
6943 
convert_separate_image_to_expression(uint32_t id)6944 std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
6945 {
6946 	auto *var = maybe_get_backing_variable(id);
6947 
6948 	// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
6949 	// In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
6950 	if (var)
6951 	{
6952 		auto &type = get<SPIRType>(var->basetype);
6953 		if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
6954 		{
6955 			if (options.vulkan_semantics)
6956 			{
6957 				if (dummy_sampler_id)
6958 				{
6959 					// Don't need to consider Shadow state since the dummy sampler is always non-shadow.
6960 					auto sampled_type = type;
6961 					sampled_type.basetype = SPIRType::SampledImage;
6962 					return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ",
6963 					            to_expression(dummy_sampler_id), ")");
6964 				}
6965 				else
6966 				{
6967 					// Newer glslang supports this extension to deal with texture2D as argument to texture functions.
6968 					require_extension_internal("GL_EXT_samplerless_texture_functions");
6969 				}
6970 			}
6971 			else
6972 			{
6973 				if (!dummy_sampler_id)
6974 					SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
6975 					                  "build_dummy_sampler_for_combined_images() called?");
6976 
6977 				return to_combined_image_sampler(id, dummy_sampler_id);
6978 			}
6979 		}
6980 	}
6981 
6982 	return to_non_uniform_aware_expression(id);
6983 }
6984 
6985 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
to_function_args(const TextureFunctionArguments & args,bool * p_forward)6986 string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
6987 {
6988 	VariableID img = args.base.img;
6989 	auto &imgtype = *args.base.imgtype;
6990 
6991 	string farg_str;
6992 	if (args.base.is_fetch)
6993 		farg_str = convert_separate_image_to_expression(img);
6994 	else
6995 		farg_str = to_non_uniform_aware_expression(img);
6996 
6997 	if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
6998 	{
6999 		// Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
7000 		farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
7001 	}
7002 
7003 	bool swizz_func = backend.swizzle_is_function;
7004 	auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
7005 		if (comps == in_comps)
7006 			return "";
7007 
7008 		switch (comps)
7009 		{
7010 		case 1:
7011 			return ".x";
7012 		case 2:
7013 			return swizz_func ? ".xy()" : ".xy";
7014 		case 3:
7015 			return swizz_func ? ".xyz()" : ".xyz";
7016 		default:
7017 			return "";
7018 		}
7019 	};
7020 
7021 	bool forward = should_forward(args.coord);
7022 
7023 	// The IR can give us more components than we need, so chop them off as needed.
7024 	auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
7025 	// Only enclose the UV expression if needed.
7026 	auto coord_expr =
7027 	    (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
7028 
7029 	// texelFetch only takes int, not uint.
7030 	auto &coord_type = expression_type(args.coord);
7031 	if (coord_type.basetype == SPIRType::UInt)
7032 	{
7033 		auto expected_type = coord_type;
7034 		expected_type.vecsize = args.coord_components;
7035 		expected_type.basetype = SPIRType::Int;
7036 		coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
7037 	}
7038 
7039 	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
7040 	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
7041 	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
7042 	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
7043 	bool workaround_lod_array_shadow_as_grad =
7044 	    ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
7045 	    image_is_comparison(imgtype, img) && args.lod != 0;
7046 
7047 	if (args.dref)
7048 	{
7049 		forward = forward && should_forward(args.dref);
7050 
7051 		// SPIR-V splits dref and coordinate.
7052 		if (args.base.is_gather ||
7053 		    args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
7054 		{
7055 			farg_str += ", ";
7056 			farg_str += to_expression(args.coord);
7057 			farg_str += ", ";
7058 			farg_str += to_expression(args.dref);
7059 		}
7060 		else if (args.base.is_proj)
7061 		{
7062 			// Have to reshuffle so we get vec4(coord, dref, proj), special case.
7063 			// Other shading languages splits up the arguments for coord and compare value like SPIR-V.
7064 			// The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
7065 			farg_str += ", vec4(";
7066 
7067 			if (imgtype.image.dim == Dim1D)
7068 			{
7069 				// Could reuse coord_expr, but we will mess up the temporary usage checking.
7070 				farg_str += to_enclosed_expression(args.coord) + ".x";
7071 				farg_str += ", ";
7072 				farg_str += "0.0, ";
7073 				farg_str += to_expression(args.dref);
7074 				farg_str += ", ";
7075 				farg_str += to_enclosed_expression(args.coord) + ".y)";
7076 			}
7077 			else if (imgtype.image.dim == Dim2D)
7078 			{
7079 				// Could reuse coord_expr, but we will mess up the temporary usage checking.
7080 				farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
7081 				farg_str += ", ";
7082 				farg_str += to_expression(args.dref);
7083 				farg_str += ", ";
7084 				farg_str += to_enclosed_expression(args.coord) + ".z)";
7085 			}
7086 			else
7087 				SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
7088 		}
7089 		else
7090 		{
7091 			// Create a composite which merges coord/dref into a single vector.
7092 			auto type = expression_type(args.coord);
7093 			type.vecsize = args.coord_components + 1;
7094 			farg_str += ", ";
7095 			farg_str += type_to_glsl_constructor(type);
7096 			farg_str += "(";
7097 			farg_str += coord_expr;
7098 			farg_str += ", ";
7099 			farg_str += to_expression(args.dref);
7100 			farg_str += ")";
7101 		}
7102 	}
7103 	else
7104 	{
7105 		farg_str += ", ";
7106 		farg_str += coord_expr;
7107 	}
7108 
7109 	if (args.grad_x || args.grad_y)
7110 	{
7111 		forward = forward && should_forward(args.grad_x);
7112 		forward = forward && should_forward(args.grad_y);
7113 		farg_str += ", ";
7114 		farg_str += to_expression(args.grad_x);
7115 		farg_str += ", ";
7116 		farg_str += to_expression(args.grad_y);
7117 	}
7118 
7119 	if (args.lod)
7120 	{
7121 		if (workaround_lod_array_shadow_as_grad)
7122 		{
7123 			// Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
7124 			// Implementing this as plain texture() is not safe on some implementations.
7125 			if (imgtype.image.dim == Dim2D)
7126 				farg_str += ", vec2(0.0), vec2(0.0)";
7127 			else if (imgtype.image.dim == DimCube)
7128 				farg_str += ", vec3(0.0), vec3(0.0)";
7129 		}
7130 		else
7131 		{
7132 			forward = forward && should_forward(args.lod);
7133 			farg_str += ", ";
7134 
7135 			auto &lod_expr_type = expression_type(args.lod);
7136 
7137 			// Lod expression for TexelFetch in GLSL must be int, and only int.
7138 			if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
7139 			    lod_expr_type.basetype != SPIRType::Int)
7140 			{
7141 				farg_str += join("int(", to_expression(args.lod), ")");
7142 			}
7143 			else
7144 			{
7145 				farg_str += to_expression(args.lod);
7146 			}
7147 		}
7148 	}
7149 	else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
7150 	{
7151 		// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
7152 		farg_str += ", 0";
7153 	}
7154 
7155 	if (args.coffset)
7156 	{
7157 		forward = forward && should_forward(args.coffset);
7158 		farg_str += ", ";
7159 		farg_str += to_expression(args.coffset);
7160 	}
7161 	else if (args.offset)
7162 	{
7163 		forward = forward && should_forward(args.offset);
7164 		farg_str += ", ";
7165 		farg_str += to_expression(args.offset);
7166 	}
7167 
7168 	if (args.sample)
7169 	{
7170 		farg_str += ", ";
7171 		farg_str += to_expression(args.sample);
7172 	}
7173 
7174 	if (args.min_lod)
7175 	{
7176 		farg_str += ", ";
7177 		farg_str += to_expression(args.min_lod);
7178 	}
7179 
7180 	if (args.sparse_texel)
7181 	{
7182 		// Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
7183 		farg_str += ", ";
7184 		farg_str += to_expression(args.sparse_texel);
7185 	}
7186 
7187 	if (args.bias)
7188 	{
7189 		forward = forward && should_forward(args.bias);
7190 		farg_str += ", ";
7191 		farg_str += to_expression(args.bias);
7192 	}
7193 
7194 	if (args.component && !expression_is_constant_null(args.component))
7195 	{
7196 		forward = forward && should_forward(args.component);
7197 		farg_str += ", ";
7198 		auto &component_type = expression_type(args.component);
7199 		if (component_type.basetype == SPIRType::Int)
7200 			farg_str += to_expression(args.component);
7201 		else
7202 			farg_str += join("int(", to_expression(args.component), ")");
7203 	}
7204 
7205 	*p_forward = forward;
7206 
7207 	return farg_str;
7208 }
7209 
emit_glsl_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t length)7210 void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
7211 {
7212 	auto op = static_cast<GLSLstd450>(eop);
7213 
7214 	if (is_legacy() && is_unsigned_glsl_opcode(op))
7215 		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
7216 
7217 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
7218 	uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
7219 	auto int_type = to_signed_basetype(integer_width);
7220 	auto uint_type = to_unsigned_basetype(integer_width);
7221 
7222 	switch (op)
7223 	{
7224 	// FP fiddling
7225 	case GLSLstd450Round:
7226 		if (!is_legacy())
7227 			emit_unary_func_op(result_type, id, args[0], "round");
7228 		else
7229 		{
7230 			auto op0 = to_enclosed_expression(args[0]);
7231 			auto &op0_type = expression_type(args[0]);
7232 			auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
7233 			bool forward = should_forward(args[0]);
7234 			emit_op(result_type, id, expr, forward);
7235 			inherit_expression_dependencies(id, args[0]);
7236 		}
7237 		break;
7238 
7239 	case GLSLstd450RoundEven:
7240 		if (!is_legacy())
7241 			emit_unary_func_op(result_type, id, args[0], "roundEven");
7242 		else if (!options.es)
7243 		{
7244 			// This extension provides round() with round-to-even semantics.
7245 			require_extension_internal("GL_EXT_gpu_shader4");
7246 			emit_unary_func_op(result_type, id, args[0], "round");
7247 		}
7248 		else
7249 			SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
7250 		break;
7251 
7252 	case GLSLstd450Trunc:
7253 		emit_unary_func_op(result_type, id, args[0], "trunc");
7254 		break;
7255 	case GLSLstd450SAbs:
7256 		emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
7257 		break;
7258 	case GLSLstd450FAbs:
7259 		emit_unary_func_op(result_type, id, args[0], "abs");
7260 		break;
7261 	case GLSLstd450SSign:
7262 		emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
7263 		break;
7264 	case GLSLstd450FSign:
7265 		emit_unary_func_op(result_type, id, args[0], "sign");
7266 		break;
7267 	case GLSLstd450Floor:
7268 		emit_unary_func_op(result_type, id, args[0], "floor");
7269 		break;
7270 	case GLSLstd450Ceil:
7271 		emit_unary_func_op(result_type, id, args[0], "ceil");
7272 		break;
7273 	case GLSLstd450Fract:
7274 		emit_unary_func_op(result_type, id, args[0], "fract");
7275 		break;
7276 	case GLSLstd450Radians:
7277 		emit_unary_func_op(result_type, id, args[0], "radians");
7278 		break;
7279 	case GLSLstd450Degrees:
7280 		emit_unary_func_op(result_type, id, args[0], "degrees");
7281 		break;
7282 	case GLSLstd450Fma:
7283 		if ((!options.es && options.version < 400) || (options.es && options.version < 320))
7284 		{
7285 			auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
7286 			                 to_enclosed_expression(args[2]));
7287 
7288 			emit_op(result_type, id, expr,
7289 			        should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
7290 			for (uint32_t i = 0; i < 3; i++)
7291 				inherit_expression_dependencies(id, args[i]);
7292 		}
7293 		else
7294 			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
7295 		break;
7296 	case GLSLstd450Modf:
7297 		register_call_out_argument(args[1]);
7298 		forced_temporaries.insert(id);
7299 		emit_binary_func_op(result_type, id, args[0], args[1], "modf");
7300 		break;
7301 
7302 	case GLSLstd450ModfStruct:
7303 	{
7304 		auto &type = get<SPIRType>(result_type);
7305 		emit_uninitialized_temporary_expression(result_type, id);
7306 		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
7307 		          to_expression(id), ".", to_member_name(type, 1), ");");
7308 		break;
7309 	}
7310 
7311 	// Minmax
7312 	case GLSLstd450UMin:
7313 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
7314 		break;
7315 
7316 	case GLSLstd450SMin:
7317 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
7318 		break;
7319 
7320 	case GLSLstd450FMin:
7321 		emit_binary_func_op(result_type, id, args[0], args[1], "min");
7322 		break;
7323 
7324 	case GLSLstd450FMax:
7325 		emit_binary_func_op(result_type, id, args[0], args[1], "max");
7326 		break;
7327 
7328 	case GLSLstd450UMax:
7329 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
7330 		break;
7331 
7332 	case GLSLstd450SMax:
7333 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
7334 		break;
7335 
7336 	case GLSLstd450FClamp:
7337 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
7338 		break;
7339 
7340 	case GLSLstd450UClamp:
7341 		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
7342 		break;
7343 
7344 	case GLSLstd450SClamp:
7345 		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
7346 		break;
7347 
7348 	// Trig
7349 	case GLSLstd450Sin:
7350 		emit_unary_func_op(result_type, id, args[0], "sin");
7351 		break;
7352 	case GLSLstd450Cos:
7353 		emit_unary_func_op(result_type, id, args[0], "cos");
7354 		break;
7355 	case GLSLstd450Tan:
7356 		emit_unary_func_op(result_type, id, args[0], "tan");
7357 		break;
7358 	case GLSLstd450Asin:
7359 		emit_unary_func_op(result_type, id, args[0], "asin");
7360 		break;
7361 	case GLSLstd450Acos:
7362 		emit_unary_func_op(result_type, id, args[0], "acos");
7363 		break;
7364 	case GLSLstd450Atan:
7365 		emit_unary_func_op(result_type, id, args[0], "atan");
7366 		break;
7367 	case GLSLstd450Sinh:
7368 		emit_unary_func_op(result_type, id, args[0], "sinh");
7369 		break;
7370 	case GLSLstd450Cosh:
7371 		emit_unary_func_op(result_type, id, args[0], "cosh");
7372 		break;
7373 	case GLSLstd450Tanh:
7374 		emit_unary_func_op(result_type, id, args[0], "tanh");
7375 		break;
7376 	case GLSLstd450Asinh:
7377 		emit_unary_func_op(result_type, id, args[0], "asinh");
7378 		break;
7379 	case GLSLstd450Acosh:
7380 		emit_unary_func_op(result_type, id, args[0], "acosh");
7381 		break;
7382 	case GLSLstd450Atanh:
7383 		emit_unary_func_op(result_type, id, args[0], "atanh");
7384 		break;
7385 	case GLSLstd450Atan2:
7386 		emit_binary_func_op(result_type, id, args[0], args[1], "atan");
7387 		break;
7388 
7389 	// Exponentials
7390 	case GLSLstd450Pow:
7391 		emit_binary_func_op(result_type, id, args[0], args[1], "pow");
7392 		break;
7393 	case GLSLstd450Exp:
7394 		emit_unary_func_op(result_type, id, args[0], "exp");
7395 		break;
7396 	case GLSLstd450Log:
7397 		emit_unary_func_op(result_type, id, args[0], "log");
7398 		break;
7399 	case GLSLstd450Exp2:
7400 		emit_unary_func_op(result_type, id, args[0], "exp2");
7401 		break;
7402 	case GLSLstd450Log2:
7403 		emit_unary_func_op(result_type, id, args[0], "log2");
7404 		break;
7405 	case GLSLstd450Sqrt:
7406 		emit_unary_func_op(result_type, id, args[0], "sqrt");
7407 		break;
7408 	case GLSLstd450InverseSqrt:
7409 		emit_unary_func_op(result_type, id, args[0], "inversesqrt");
7410 		break;
7411 
7412 	// Matrix math
7413 	case GLSLstd450Determinant:
7414 		emit_unary_func_op(result_type, id, args[0], "determinant");
7415 		break;
7416 	case GLSLstd450MatrixInverse:
7417 		emit_unary_func_op(result_type, id, args[0], "inverse");
7418 		break;
7419 
7420 	// Lerping
7421 	case GLSLstd450FMix:
7422 	case GLSLstd450IMix:
7423 	{
7424 		emit_mix_op(result_type, id, args[0], args[1], args[2]);
7425 		break;
7426 	}
7427 	case GLSLstd450Step:
7428 		emit_binary_func_op(result_type, id, args[0], args[1], "step");
7429 		break;
7430 	case GLSLstd450SmoothStep:
7431 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
7432 		break;
7433 
7434 	// Packing
7435 	case GLSLstd450Frexp:
7436 		register_call_out_argument(args[1]);
7437 		forced_temporaries.insert(id);
7438 		emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
7439 		break;
7440 
7441 	case GLSLstd450FrexpStruct:
7442 	{
7443 		auto &type = get<SPIRType>(result_type);
7444 		emit_uninitialized_temporary_expression(result_type, id);
7445 		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
7446 		          to_expression(id), ".", to_member_name(type, 1), ");");
7447 		break;
7448 	}
7449 
7450 	case GLSLstd450Ldexp:
7451 	{
7452 		bool forward = should_forward(args[0]) && should_forward(args[1]);
7453 
7454 		auto op0 = to_unpacked_expression(args[0]);
7455 		auto op1 = to_unpacked_expression(args[1]);
7456 		auto &op1_type = expression_type(args[1]);
7457 		if (op1_type.basetype != SPIRType::Int)
7458 		{
7459 			// Need a value cast here.
7460 			auto target_type = op1_type;
7461 			target_type.basetype = SPIRType::Int;
7462 			op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
7463 		}
7464 
7465 		auto expr = join("ldexp(", op0, ", ", op1, ")");
7466 
7467 		emit_op(result_type, id, expr, forward);
7468 		inherit_expression_dependencies(id, args[0]);
7469 		inherit_expression_dependencies(id, args[1]);
7470 		break;
7471 	}
7472 
7473 	case GLSLstd450PackSnorm4x8:
7474 		emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
7475 		break;
7476 	case GLSLstd450PackUnorm4x8:
7477 		emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
7478 		break;
7479 	case GLSLstd450PackSnorm2x16:
7480 		emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
7481 		break;
7482 	case GLSLstd450PackUnorm2x16:
7483 		emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
7484 		break;
7485 	case GLSLstd450PackHalf2x16:
7486 		emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
7487 		break;
7488 	case GLSLstd450UnpackSnorm4x8:
7489 		emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
7490 		break;
7491 	case GLSLstd450UnpackUnorm4x8:
7492 		emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
7493 		break;
7494 	case GLSLstd450UnpackSnorm2x16:
7495 		emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
7496 		break;
7497 	case GLSLstd450UnpackUnorm2x16:
7498 		emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
7499 		break;
7500 	case GLSLstd450UnpackHalf2x16:
7501 		emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
7502 		break;
7503 
7504 	case GLSLstd450PackDouble2x32:
7505 		emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
7506 		break;
7507 	case GLSLstd450UnpackDouble2x32:
7508 		emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
7509 		break;
7510 
7511 	// Vector math
7512 	case GLSLstd450Length:
7513 		emit_unary_func_op(result_type, id, args[0], "length");
7514 		break;
7515 	case GLSLstd450Distance:
7516 		emit_binary_func_op(result_type, id, args[0], args[1], "distance");
7517 		break;
7518 	case GLSLstd450Cross:
7519 		emit_binary_func_op(result_type, id, args[0], args[1], "cross");
7520 		break;
7521 	case GLSLstd450Normalize:
7522 		emit_unary_func_op(result_type, id, args[0], "normalize");
7523 		break;
7524 	case GLSLstd450FaceForward:
7525 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
7526 		break;
7527 	case GLSLstd450Reflect:
7528 		emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
7529 		break;
7530 	case GLSLstd450Refract:
7531 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
7532 		break;
7533 
7534 	// Bit-fiddling
7535 	case GLSLstd450FindILsb:
7536 		// findLSB always returns int.
7537 		emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
7538 		break;
7539 
7540 	case GLSLstd450FindSMsb:
7541 		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
7542 		break;
7543 
7544 	case GLSLstd450FindUMsb:
7545 		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
7546 		                        int_type); // findMSB always returns int.
7547 		break;
7548 
7549 	// Multisampled varying
7550 	case GLSLstd450InterpolateAtCentroid:
7551 		emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
7552 		break;
7553 	case GLSLstd450InterpolateAtSample:
7554 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
7555 		break;
7556 	case GLSLstd450InterpolateAtOffset:
7557 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
7558 		break;
7559 
7560 	case GLSLstd450NMin:
7561 	case GLSLstd450NMax:
7562 	{
7563 		emit_nminmax_op(result_type, id, args[0], args[1], op);
7564 		break;
7565 	}
7566 
7567 	case GLSLstd450NClamp:
7568 	{
7569 		// Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
7570 		// IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
7571 		uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
7572 		if (!max_id)
7573 			max_id = ir.increase_bound_by(1);
7574 
7575 		// Inherit precision qualifiers.
7576 		ir.meta[max_id] = ir.meta[id];
7577 
7578 		emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
7579 		emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
7580 		break;
7581 	}
7582 
7583 	default:
7584 		statement("// unimplemented GLSL op ", eop);
7585 		break;
7586 	}
7587 }
7588 
emit_nminmax_op(uint32_t result_type,uint32_t id,uint32_t op0,uint32_t op1,GLSLstd450 op)7589 void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
7590 {
7591 	// Need to emulate this call.
7592 	uint32_t &ids = extra_sub_expressions[id];
7593 	if (!ids)
7594 	{
7595 		ids = ir.increase_bound_by(5);
7596 		auto btype = get<SPIRType>(result_type);
7597 		btype.basetype = SPIRType::Boolean;
7598 		set<SPIRType>(ids, btype);
7599 	}
7600 
7601 	uint32_t btype_id = ids + 0;
7602 	uint32_t left_nan_id = ids + 1;
7603 	uint32_t right_nan_id = ids + 2;
7604 	uint32_t tmp_id = ids + 3;
7605 	uint32_t mixed_first_id = ids + 4;
7606 
7607 	// Inherit precision qualifiers.
7608 	ir.meta[tmp_id] = ir.meta[id];
7609 	ir.meta[mixed_first_id] = ir.meta[id];
7610 
7611 	emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
7612 	emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
7613 	emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
7614 	emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
7615 	emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
7616 }
7617 
emit_spv_amd_shader_ballot_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7618 void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7619                                                  uint32_t)
7620 {
7621 	require_extension_internal("GL_AMD_shader_ballot");
7622 
7623 	enum AMDShaderBallot
7624 	{
7625 		SwizzleInvocationsAMD = 1,
7626 		SwizzleInvocationsMaskedAMD = 2,
7627 		WriteInvocationAMD = 3,
7628 		MbcntAMD = 4
7629 	};
7630 
7631 	auto op = static_cast<AMDShaderBallot>(eop);
7632 
7633 	switch (op)
7634 	{
7635 	case SwizzleInvocationsAMD:
7636 		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
7637 		register_control_dependent_expression(id);
7638 		break;
7639 
7640 	case SwizzleInvocationsMaskedAMD:
7641 		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
7642 		register_control_dependent_expression(id);
7643 		break;
7644 
7645 	case WriteInvocationAMD:
7646 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
7647 		register_control_dependent_expression(id);
7648 		break;
7649 
7650 	case MbcntAMD:
7651 		emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
7652 		register_control_dependent_expression(id);
7653 		break;
7654 
7655 	default:
7656 		statement("// unimplemented SPV AMD shader ballot op ", eop);
7657 		break;
7658 	}
7659 }
7660 
emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7661 void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
7662                                                                     const uint32_t *args, uint32_t)
7663 {
7664 	require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
7665 
7666 	enum AMDShaderExplicitVertexParameter
7667 	{
7668 		InterpolateAtVertexAMD = 1
7669 	};
7670 
7671 	auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
7672 
7673 	switch (op)
7674 	{
7675 	case InterpolateAtVertexAMD:
7676 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
7677 		break;
7678 
7679 	default:
7680 		statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
7681 		break;
7682 	}
7683 }
7684 
emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7685 void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
7686                                                          const uint32_t *args, uint32_t)
7687 {
7688 	require_extension_internal("GL_AMD_shader_trinary_minmax");
7689 
7690 	enum AMDShaderTrinaryMinMax
7691 	{
7692 		FMin3AMD = 1,
7693 		UMin3AMD = 2,
7694 		SMin3AMD = 3,
7695 		FMax3AMD = 4,
7696 		UMax3AMD = 5,
7697 		SMax3AMD = 6,
7698 		FMid3AMD = 7,
7699 		UMid3AMD = 8,
7700 		SMid3AMD = 9
7701 	};
7702 
7703 	auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
7704 
7705 	switch (op)
7706 	{
7707 	case FMin3AMD:
7708 	case UMin3AMD:
7709 	case SMin3AMD:
7710 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
7711 		break;
7712 
7713 	case FMax3AMD:
7714 	case UMax3AMD:
7715 	case SMax3AMD:
7716 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
7717 		break;
7718 
7719 	case FMid3AMD:
7720 	case UMid3AMD:
7721 	case SMid3AMD:
7722 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
7723 		break;
7724 
7725 	default:
7726 		statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
7727 		break;
7728 	}
7729 }
7730 
emit_spv_amd_gcn_shader_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7731 void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7732                                               uint32_t)
7733 {
7734 	require_extension_internal("GL_AMD_gcn_shader");
7735 
7736 	enum AMDGCNShader
7737 	{
7738 		CubeFaceIndexAMD = 1,
7739 		CubeFaceCoordAMD = 2,
7740 		TimeAMD = 3
7741 	};
7742 
7743 	auto op = static_cast<AMDGCNShader>(eop);
7744 
7745 	switch (op)
7746 	{
7747 	case CubeFaceIndexAMD:
7748 		emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
7749 		break;
7750 	case CubeFaceCoordAMD:
7751 		emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
7752 		break;
7753 	case TimeAMD:
7754 	{
7755 		string expr = "timeAMD()";
7756 		emit_op(result_type, id, expr, true);
7757 		register_control_dependent_expression(id);
7758 		break;
7759 	}
7760 
7761 	default:
7762 		statement("// unimplemented SPV AMD gcn shader op ", eop);
7763 		break;
7764 	}
7765 }
7766 
emit_subgroup_op(const Instruction & i)7767 void CompilerGLSL::emit_subgroup_op(const Instruction &i)
7768 {
7769 	const uint32_t *ops = stream(i);
7770 	auto op = static_cast<Op>(i.op);
7771 
7772 	if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
7773 		SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
7774 
7775 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
7776 	uint32_t integer_width = get_integer_width_for_instruction(i);
7777 	auto int_type = to_signed_basetype(integer_width);
7778 	auto uint_type = to_unsigned_basetype(integer_width);
7779 
7780 	switch (op)
7781 	{
7782 	case OpGroupNonUniformElect:
7783 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
7784 		break;
7785 
7786 	case OpGroupNonUniformBallotBitCount:
7787 	{
7788 		const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
7789 		if (operation == GroupOperationReduce)
7790 			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
7791 		else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
7792 			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7793 	}
7794 	break;
7795 
7796 	case OpGroupNonUniformBallotBitExtract:
7797 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
7798 		break;
7799 
7800 	case OpGroupNonUniformInverseBallot:
7801 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7802 		break;
7803 
7804 	case OpGroupNonUniformBallot:
7805 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
7806 		break;
7807 
7808 	case OpGroupNonUniformBallotFindLSB:
7809 	case OpGroupNonUniformBallotFindMSB:
7810 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
7811 		break;
7812 
7813 	case OpGroupNonUniformBroadcast:
7814 	case OpGroupNonUniformBroadcastFirst:
7815 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
7816 		break;
7817 
7818 	case OpGroupNonUniformShuffle:
7819 	case OpGroupNonUniformShuffleXor:
7820 		require_extension_internal("GL_KHR_shader_subgroup_shuffle");
7821 		break;
7822 
7823 	case OpGroupNonUniformShuffleUp:
7824 	case OpGroupNonUniformShuffleDown:
7825 		require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
7826 		break;
7827 
7828 	case OpGroupNonUniformAll:
7829 	case OpGroupNonUniformAny:
7830 	case OpGroupNonUniformAllEqual:
7831 	{
7832 		const SPIRType &type = expression_type(ops[3]);
7833 		if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
7834 			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
7835 		else
7836 			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
7837 	}
7838 	break;
7839 
7840 	case OpGroupNonUniformFAdd:
7841 	case OpGroupNonUniformFMul:
7842 	case OpGroupNonUniformFMin:
7843 	case OpGroupNonUniformFMax:
7844 	case OpGroupNonUniformIAdd:
7845 	case OpGroupNonUniformIMul:
7846 	case OpGroupNonUniformSMin:
7847 	case OpGroupNonUniformSMax:
7848 	case OpGroupNonUniformUMin:
7849 	case OpGroupNonUniformUMax:
7850 	case OpGroupNonUniformBitwiseAnd:
7851 	case OpGroupNonUniformBitwiseOr:
7852 	case OpGroupNonUniformBitwiseXor:
7853 	case OpGroupNonUniformLogicalAnd:
7854 	case OpGroupNonUniformLogicalOr:
7855 	case OpGroupNonUniformLogicalXor:
7856 	{
7857 		auto operation = static_cast<GroupOperation>(ops[3]);
7858 		if (operation == GroupOperationClusteredReduce)
7859 		{
7860 			require_extension_internal("GL_KHR_shader_subgroup_clustered");
7861 		}
7862 		else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
7863 		         operation == GroupOperationReduce)
7864 		{
7865 			require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
7866 		}
7867 		else
7868 			SPIRV_CROSS_THROW("Invalid group operation.");
7869 		break;
7870 	}
7871 
7872 	case OpGroupNonUniformQuadSwap:
7873 	case OpGroupNonUniformQuadBroadcast:
7874 		require_extension_internal("GL_KHR_shader_subgroup_quad");
7875 		break;
7876 
7877 	default:
7878 		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
7879 	}
7880 
7881 	uint32_t result_type = ops[0];
7882 	uint32_t id = ops[1];
7883 
7884 	auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
7885 	if (scope != ScopeSubgroup)
7886 		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
7887 
7888 	switch (op)
7889 	{
7890 	case OpGroupNonUniformElect:
7891 		emit_op(result_type, id, "subgroupElect()", true);
7892 		break;
7893 
7894 	case OpGroupNonUniformBroadcast:
7895 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
7896 		break;
7897 
7898 	case OpGroupNonUniformBroadcastFirst:
7899 		emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
7900 		break;
7901 
7902 	case OpGroupNonUniformBallot:
7903 		emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
7904 		break;
7905 
7906 	case OpGroupNonUniformInverseBallot:
7907 		emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
7908 		break;
7909 
7910 	case OpGroupNonUniformBallotBitExtract:
7911 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
7912 		break;
7913 
7914 	case OpGroupNonUniformBallotFindLSB:
7915 		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
7916 		break;
7917 
7918 	case OpGroupNonUniformBallotFindMSB:
7919 		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
7920 		break;
7921 
7922 	case OpGroupNonUniformBallotBitCount:
7923 	{
7924 		auto operation = static_cast<GroupOperation>(ops[3]);
7925 		if (operation == GroupOperationReduce)
7926 			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
7927 		else if (operation == GroupOperationInclusiveScan)
7928 			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
7929 		else if (operation == GroupOperationExclusiveScan)
7930 			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
7931 		else
7932 			SPIRV_CROSS_THROW("Invalid BitCount operation.");
7933 		break;
7934 	}
7935 
7936 	case OpGroupNonUniformShuffle:
7937 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
7938 		break;
7939 
7940 	case OpGroupNonUniformShuffleXor:
7941 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
7942 		break;
7943 
7944 	case OpGroupNonUniformShuffleUp:
7945 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
7946 		break;
7947 
7948 	case OpGroupNonUniformShuffleDown:
7949 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
7950 		break;
7951 
7952 	case OpGroupNonUniformAll:
7953 		emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
7954 		break;
7955 
7956 	case OpGroupNonUniformAny:
7957 		emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
7958 		break;
7959 
7960 	case OpGroupNonUniformAllEqual:
7961 		emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
7962 		break;
7963 
7964 		// clang-format off
7965 #define GLSL_GROUP_OP(op, glsl_op) \
7966 case OpGroupNonUniform##op: \
7967 	{ \
7968 		auto operation = static_cast<GroupOperation>(ops[3]); \
7969 		if (operation == GroupOperationReduce) \
7970 			emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
7971 		else if (operation == GroupOperationInclusiveScan) \
7972 			emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
7973 		else if (operation == GroupOperationExclusiveScan) \
7974 			emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
7975 		else if (operation == GroupOperationClusteredReduce) \
7976 			emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
7977 		else \
7978 			SPIRV_CROSS_THROW("Invalid group operation."); \
7979 		break; \
7980 	}
7981 
7982 #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
7983 case OpGroupNonUniform##op: \
7984 	{ \
7985 		auto operation = static_cast<GroupOperation>(ops[3]); \
7986 		if (operation == GroupOperationReduce) \
7987 			emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
7988 		else if (operation == GroupOperationInclusiveScan) \
7989 			emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
7990 		else if (operation == GroupOperationExclusiveScan) \
7991 			emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
7992 		else if (operation == GroupOperationClusteredReduce) \
7993 			emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
7994 		else \
7995 			SPIRV_CROSS_THROW("Invalid group operation."); \
7996 		break; \
7997 	}
7998 
7999 	GLSL_GROUP_OP(FAdd, Add)
8000 	GLSL_GROUP_OP(FMul, Mul)
8001 	GLSL_GROUP_OP(FMin, Min)
8002 	GLSL_GROUP_OP(FMax, Max)
8003 	GLSL_GROUP_OP(IAdd, Add)
8004 	GLSL_GROUP_OP(IMul, Mul)
8005 	GLSL_GROUP_OP_CAST(SMin, Min, int_type)
8006 	GLSL_GROUP_OP_CAST(SMax, Max, int_type)
8007 	GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
8008 	GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
8009 	GLSL_GROUP_OP(BitwiseAnd, And)
8010 	GLSL_GROUP_OP(BitwiseOr, Or)
8011 	GLSL_GROUP_OP(BitwiseXor, Xor)
8012 	GLSL_GROUP_OP(LogicalAnd, And)
8013 	GLSL_GROUP_OP(LogicalOr, Or)
8014 	GLSL_GROUP_OP(LogicalXor, Xor)
8015 #undef GLSL_GROUP_OP
8016 #undef GLSL_GROUP_OP_CAST
8017 		// clang-format on
8018 
8019 	case OpGroupNonUniformQuadSwap:
8020 	{
8021 		uint32_t direction = evaluate_constant_u32(ops[4]);
8022 		if (direction == 0)
8023 			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
8024 		else if (direction == 1)
8025 			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
8026 		else if (direction == 2)
8027 			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
8028 		else
8029 			SPIRV_CROSS_THROW("Invalid quad swap direction.");
8030 		break;
8031 	}
8032 
8033 	case OpGroupNonUniformQuadBroadcast:
8034 	{
8035 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
8036 		break;
8037 	}
8038 
8039 	default:
8040 		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
8041 	}
8042 
8043 	register_control_dependent_expression(id);
8044 }
8045 
bitcast_glsl_op(const SPIRType & out_type,const SPIRType & in_type)8046 string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
8047 {
8048 	// OpBitcast can deal with pointers.
8049 	if (out_type.pointer || in_type.pointer)
8050 		return type_to_glsl(out_type);
8051 
8052 	if (out_type.basetype == in_type.basetype)
8053 		return "";
8054 
8055 	assert(out_type.basetype != SPIRType::Boolean);
8056 	assert(in_type.basetype != SPIRType::Boolean);
8057 
8058 	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
8059 	bool same_size_cast = out_type.width == in_type.width;
8060 
8061 	// Trivial bitcast case, casts between integers.
8062 	if (integral_cast && same_size_cast)
8063 		return type_to_glsl(out_type);
8064 
8065 	// Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
8066 	if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
8067 		return "unpack8";
8068 	else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
8069 		return "pack16";
8070 	else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
8071 		return "pack32";
8072 
8073 	// Floating <-> Integer special casts. Just have to enumerate all cases. :(
8074 	// 16-bit, 32-bit and 64-bit floats.
8075 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
8076 	{
8077 		if (is_legacy_es())
8078 			SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
8079 		else if (!options.es && options.version < 330)
8080 			require_extension_internal("GL_ARB_shader_bit_encoding");
8081 		return "floatBitsToUint";
8082 	}
8083 	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
8084 	{
8085 		if (is_legacy_es())
8086 			SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
8087 		else if (!options.es && options.version < 330)
8088 			require_extension_internal("GL_ARB_shader_bit_encoding");
8089 		return "floatBitsToInt";
8090 	}
8091 	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
8092 	{
8093 		if (is_legacy_es())
8094 			SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
8095 		else if (!options.es && options.version < 330)
8096 			require_extension_internal("GL_ARB_shader_bit_encoding");
8097 		return "uintBitsToFloat";
8098 	}
8099 	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
8100 	{
8101 		if (is_legacy_es())
8102 			SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
8103 		else if (!options.es && options.version < 330)
8104 			require_extension_internal("GL_ARB_shader_bit_encoding");
8105 		return "intBitsToFloat";
8106 	}
8107 
8108 	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
8109 		return "doubleBitsToInt64";
8110 	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
8111 		return "doubleBitsToUint64";
8112 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
8113 		return "int64BitsToDouble";
8114 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
8115 		return "uint64BitsToDouble";
8116 	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
8117 		return "float16BitsToInt16";
8118 	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
8119 		return "float16BitsToUint16";
8120 	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
8121 		return "int16BitsToFloat16";
8122 	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
8123 		return "uint16BitsToFloat16";
8124 
8125 	// And finally, some even more special purpose casts.
8126 	if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
8127 		return "packUint2x32";
8128 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
8129 		return "unpackUint2x32";
8130 	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
8131 		return "unpackFloat2x16";
8132 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
8133 		return "packFloat2x16";
8134 	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
8135 		return "packInt2x16";
8136 	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
8137 		return "unpackInt2x16";
8138 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
8139 		return "packUint2x16";
8140 	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
8141 		return "unpackUint2x16";
8142 	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
8143 		return "packInt4x16";
8144 	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
8145 		return "unpackInt4x16";
8146 	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
8147 		return "packUint4x16";
8148 	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
8149 		return "unpackUint4x16";
8150 
8151 	return "";
8152 }
8153 
bitcast_glsl(const SPIRType & result_type,uint32_t argument)8154 string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
8155 {
8156 	auto op = bitcast_glsl_op(result_type, expression_type(argument));
8157 	if (op.empty())
8158 		return to_enclosed_unpacked_expression(argument);
8159 	else
8160 		return join(op, "(", to_unpacked_expression(argument), ")");
8161 }
8162 
bitcast_expression(SPIRType::BaseType target_type,uint32_t arg)8163 std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
8164 {
8165 	auto expr = to_expression(arg);
8166 	auto &src_type = expression_type(arg);
8167 	if (src_type.basetype != target_type)
8168 	{
8169 		auto target = src_type;
8170 		target.basetype = target_type;
8171 		expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
8172 	}
8173 
8174 	return expr;
8175 }
8176 
bitcast_expression(const SPIRType & target_type,SPIRType::BaseType expr_type,const std::string & expr)8177 std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
8178                                              const std::string &expr)
8179 {
8180 	if (target_type.basetype == expr_type)
8181 		return expr;
8182 
8183 	auto src_type = target_type;
8184 	src_type.basetype = expr_type;
8185 	return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
8186 }
8187 
builtin_to_glsl(BuiltIn builtin,StorageClass storage)8188 string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
8189 {
8190 	switch (builtin)
8191 	{
8192 	case BuiltInPosition:
8193 		return "gl_Position";
8194 	case BuiltInPointSize:
8195 		return "gl_PointSize";
8196 	case BuiltInClipDistance:
8197 		return "gl_ClipDistance";
8198 	case BuiltInCullDistance:
8199 		return "gl_CullDistance";
8200 	case BuiltInVertexId:
8201 		if (options.vulkan_semantics)
8202 			SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
8203 			                  "with GL semantics.");
8204 		return "gl_VertexID";
8205 	case BuiltInInstanceId:
8206 		if (options.vulkan_semantics)
8207 		{
8208 			auto model = get_entry_point().model;
8209 			switch (model)
8210 			{
8211 			case spv::ExecutionModelIntersectionKHR:
8212 			case spv::ExecutionModelAnyHitKHR:
8213 			case spv::ExecutionModelClosestHitKHR:
8214 				// gl_InstanceID is allowed in these shaders.
8215 				break;
8216 
8217 			default:
8218 				SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
8219 				                  "created with GL semantics.");
8220 			}
8221 		}
8222 		if (!options.es && options.version < 140)
8223 		{
8224 			require_extension_internal("GL_ARB_draw_instanced");
8225 		}
8226 		return "gl_InstanceID";
8227 	case BuiltInVertexIndex:
8228 		if (options.vulkan_semantics)
8229 			return "gl_VertexIndex";
8230 		else
8231 			return "gl_VertexID"; // gl_VertexID already has the base offset applied.
8232 	case BuiltInInstanceIndex:
8233 		if (options.vulkan_semantics)
8234 			return "gl_InstanceIndex";
8235 
8236 		if (!options.es && options.version < 140)
8237 		{
8238 			require_extension_internal("GL_ARB_draw_instanced");
8239 		}
8240 
8241 		if (options.vertex.support_nonzero_base_instance)
8242 		{
8243 			if (!options.vulkan_semantics)
8244 			{
8245 				// This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
8246 				require_extension_internal("GL_ARB_shader_draw_parameters");
8247 			}
8248 			return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
8249 		}
8250 		else
8251 			return "gl_InstanceID";
8252 	case BuiltInPrimitiveId:
8253 		if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
8254 			return "gl_PrimitiveIDIn";
8255 		else
8256 			return "gl_PrimitiveID";
8257 	case BuiltInInvocationId:
8258 		return "gl_InvocationID";
8259 	case BuiltInLayer:
8260 		return "gl_Layer";
8261 	case BuiltInViewportIndex:
8262 		return "gl_ViewportIndex";
8263 	case BuiltInTessLevelOuter:
8264 		return "gl_TessLevelOuter";
8265 	case BuiltInTessLevelInner:
8266 		return "gl_TessLevelInner";
8267 	case BuiltInTessCoord:
8268 		return "gl_TessCoord";
8269 	case BuiltInFragCoord:
8270 		return "gl_FragCoord";
8271 	case BuiltInPointCoord:
8272 		return "gl_PointCoord";
8273 	case BuiltInFrontFacing:
8274 		return "gl_FrontFacing";
8275 	case BuiltInFragDepth:
8276 		return "gl_FragDepth";
8277 	case BuiltInNumWorkgroups:
8278 		return "gl_NumWorkGroups";
8279 	case BuiltInWorkgroupSize:
8280 		return "gl_WorkGroupSize";
8281 	case BuiltInWorkgroupId:
8282 		return "gl_WorkGroupID";
8283 	case BuiltInLocalInvocationId:
8284 		return "gl_LocalInvocationID";
8285 	case BuiltInGlobalInvocationId:
8286 		return "gl_GlobalInvocationID";
8287 	case BuiltInLocalInvocationIndex:
8288 		return "gl_LocalInvocationIndex";
8289 	case BuiltInHelperInvocation:
8290 		return "gl_HelperInvocation";
8291 
8292 	case BuiltInBaseVertex:
8293 		if (options.es)
8294 			SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
8295 
8296 		if (options.vulkan_semantics)
8297 		{
8298 			if (options.version < 460)
8299 			{
8300 				require_extension_internal("GL_ARB_shader_draw_parameters");
8301 				return "gl_BaseVertexARB";
8302 			}
8303 			return "gl_BaseVertex";
8304 		}
8305 		// On regular GL, this is soft-enabled and we emit ifdefs in code.
8306 		require_extension_internal("GL_ARB_shader_draw_parameters");
8307 		return "SPIRV_Cross_BaseVertex";
8308 
8309 	case BuiltInBaseInstance:
8310 		if (options.es)
8311 			SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
8312 
8313 		if (options.vulkan_semantics)
8314 		{
8315 			if (options.version < 460)
8316 			{
8317 				require_extension_internal("GL_ARB_shader_draw_parameters");
8318 				return "gl_BaseInstanceARB";
8319 			}
8320 			return "gl_BaseInstance";
8321 		}
8322 		// On regular GL, this is soft-enabled and we emit ifdefs in code.
8323 		require_extension_internal("GL_ARB_shader_draw_parameters");
8324 		return "SPIRV_Cross_BaseInstance";
8325 
8326 	case BuiltInDrawIndex:
8327 		if (options.es)
8328 			SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
8329 
8330 		if (options.vulkan_semantics)
8331 		{
8332 			if (options.version < 460)
8333 			{
8334 				require_extension_internal("GL_ARB_shader_draw_parameters");
8335 				return "gl_DrawIDARB";
8336 			}
8337 			return "gl_DrawID";
8338 		}
8339 		// On regular GL, this is soft-enabled and we emit ifdefs in code.
8340 		require_extension_internal("GL_ARB_shader_draw_parameters");
8341 		return "gl_DrawIDARB";
8342 
8343 	case BuiltInSampleId:
8344 		if (options.es && options.version < 320)
8345 			require_extension_internal("GL_OES_sample_variables");
8346 		if (!options.es && options.version < 400)
8347 			SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
8348 		return "gl_SampleID";
8349 
8350 	case BuiltInSampleMask:
8351 		if (options.es && options.version < 320)
8352 			require_extension_internal("GL_OES_sample_variables");
8353 		if (!options.es && options.version < 400)
8354 			SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
8355 
8356 		if (storage == StorageClassInput)
8357 			return "gl_SampleMaskIn";
8358 		else
8359 			return "gl_SampleMask";
8360 
8361 	case BuiltInSamplePosition:
8362 		if (options.es && options.version < 320)
8363 			require_extension_internal("GL_OES_sample_variables");
8364 		if (!options.es && options.version < 400)
8365 			SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
8366 		return "gl_SamplePosition";
8367 
8368 	case BuiltInViewIndex:
8369 		if (options.vulkan_semantics)
8370 			return "gl_ViewIndex";
8371 		else
8372 			return "gl_ViewID_OVR";
8373 
8374 	case BuiltInNumSubgroups:
8375 		request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
8376 		return "gl_NumSubgroups";
8377 
8378 	case BuiltInSubgroupId:
8379 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
8380 		return "gl_SubgroupID";
8381 
8382 	case BuiltInSubgroupSize:
8383 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
8384 		return "gl_SubgroupSize";
8385 
8386 	case BuiltInSubgroupLocalInvocationId:
8387 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
8388 		return "gl_SubgroupInvocationID";
8389 
8390 	case BuiltInSubgroupEqMask:
8391 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8392 		return "gl_SubgroupEqMask";
8393 
8394 	case BuiltInSubgroupGeMask:
8395 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8396 		return "gl_SubgroupGeMask";
8397 
8398 	case BuiltInSubgroupGtMask:
8399 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8400 		return "gl_SubgroupGtMask";
8401 
8402 	case BuiltInSubgroupLeMask:
8403 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8404 		return "gl_SubgroupLeMask";
8405 
8406 	case BuiltInSubgroupLtMask:
8407 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8408 		return "gl_SubgroupLtMask";
8409 
8410 	case BuiltInLaunchIdKHR:
8411 		return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
8412 	case BuiltInLaunchSizeKHR:
8413 		return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
8414 	case BuiltInWorldRayOriginKHR:
8415 		return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
8416 	case BuiltInWorldRayDirectionKHR:
8417 		return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
8418 	case BuiltInObjectRayOriginKHR:
8419 		return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
8420 	case BuiltInObjectRayDirectionKHR:
8421 		return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
8422 	case BuiltInRayTminKHR:
8423 		return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
8424 	case BuiltInRayTmaxKHR:
8425 		return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
8426 	case BuiltInInstanceCustomIndexKHR:
8427 		return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
8428 	case BuiltInObjectToWorldKHR:
8429 		return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
8430 	case BuiltInWorldToObjectKHR:
8431 		return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
8432 	case BuiltInHitTNV:
8433 		// gl_HitTEXT is an alias of RayTMax in KHR.
8434 		return "gl_HitTNV";
8435 	case BuiltInHitKindKHR:
8436 		return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
8437 	case BuiltInIncomingRayFlagsKHR:
8438 		return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
8439 
8440 	case BuiltInBaryCoordNV:
8441 	{
8442 		if (options.es && options.version < 320)
8443 			SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
8444 		else if (!options.es && options.version < 450)
8445 			SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
8446 		require_extension_internal("GL_NV_fragment_shader_barycentric");
8447 		return "gl_BaryCoordNV";
8448 	}
8449 
8450 	case BuiltInBaryCoordNoPerspNV:
8451 	{
8452 		if (options.es && options.version < 320)
8453 			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
8454 		else if (!options.es && options.version < 450)
8455 			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
8456 		require_extension_internal("GL_NV_fragment_shader_barycentric");
8457 		return "gl_BaryCoordNoPerspNV";
8458 	}
8459 
8460 	case BuiltInFragStencilRefEXT:
8461 	{
8462 		if (!options.es)
8463 		{
8464 			require_extension_internal("GL_ARB_shader_stencil_export");
8465 			return "gl_FragStencilRefARB";
8466 		}
8467 		else
8468 			SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
8469 	}
8470 
8471 	case BuiltInPrimitiveShadingRateKHR:
8472 	{
8473 		if (!options.vulkan_semantics)
8474 			SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
8475 		require_extension_internal("GL_EXT_fragment_shading_rate");
8476 		return "gl_PrimitiveShadingRateEXT";
8477 	}
8478 
8479 	case BuiltInShadingRateKHR:
8480 	{
8481 		if (!options.vulkan_semantics)
8482 			SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
8483 		require_extension_internal("GL_EXT_fragment_shading_rate");
8484 		return "gl_ShadingRateEXT";
8485 	}
8486 
8487 	case BuiltInDeviceIndex:
8488 		if (!options.vulkan_semantics)
8489 			SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
8490 		require_extension_internal("GL_EXT_device_group");
8491 		return "gl_DeviceIndex";
8492 
8493 	case BuiltInFullyCoveredEXT:
8494 		if (!options.es)
8495 			require_extension_internal("GL_NV_conservative_raster_underestimation");
8496 		else
8497 			SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
8498 		return "gl_FragFullyCoveredNV";
8499 
8500 	default:
8501 		return join("gl_BuiltIn_", convert_to_string(builtin));
8502 	}
8503 }
8504 
index_to_swizzle(uint32_t index)8505 const char *CompilerGLSL::index_to_swizzle(uint32_t index)
8506 {
8507 	switch (index)
8508 	{
8509 	case 0:
8510 		return "x";
8511 	case 1:
8512 		return "y";
8513 	case 2:
8514 		return "z";
8515 	case 3:
8516 		return "w";
8517 	default:
8518 		SPIRV_CROSS_THROW("Swizzle index out of range");
8519 	}
8520 }
8521 
access_chain_internal_append_index(std::string & expr,uint32_t,const SPIRType *,AccessChainFlags flags,bool &,uint32_t index)8522 void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
8523                                                       AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
8524                                                       uint32_t index)
8525 {
8526 	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8527 	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8528 
8529 	expr += "[";
8530 
8531 	if (index_is_literal)
8532 		expr += convert_to_string(index);
8533 	else
8534 		expr += to_expression(index, register_expression_read);
8535 
8536 	expr += "]";
8537 }
8538 
access_chain_needs_stage_io_builtin_translation(uint32_t)8539 bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
8540 {
8541 	return true;
8542 }
8543 
access_chain_internal(uint32_t base,const uint32_t * indices,uint32_t count,AccessChainFlags flags,AccessChainMeta * meta)8544 string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
8545                                            AccessChainFlags flags, AccessChainMeta *meta)
8546 {
8547 	string expr;
8548 
8549 	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8550 	bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
8551 	bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
8552 	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
8553 	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8554 	bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
8555 
8556 	if (!chain_only)
8557 	{
8558 		// We handle transpose explicitly, so don't resolve that here.
8559 		auto *e = maybe_get<SPIRExpression>(base);
8560 		bool old_transpose = e && e->need_transpose;
8561 		if (e)
8562 			e->need_transpose = false;
8563 		expr = to_enclosed_expression(base, register_expression_read);
8564 		if (e)
8565 			e->need_transpose = old_transpose;
8566 	}
8567 
8568 	// Start traversing type hierarchy at the proper non-pointer types,
8569 	// but keep type_id referencing the original pointer for use below.
8570 	uint32_t type_id = expression_type_id(base);
8571 
8572 	if (!backend.native_pointers)
8573 	{
8574 		if (ptr_chain)
8575 			SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
8576 
8577 		// Wrapped buffer reference pointer types will need to poke into the internal "value" member before
8578 		// continuing the access chain.
8579 		if (should_dereference(base))
8580 		{
8581 			auto &type = get<SPIRType>(type_id);
8582 			expr = dereference_expression(type, expr);
8583 		}
8584 	}
8585 
8586 	const auto *type = &get_pointee_type(type_id);
8587 
8588 	bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
8589 	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
8590 	bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
8591 	uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
8592 	bool is_invariant = has_decoration(base, DecorationInvariant);
8593 	bool pending_array_enclose = false;
8594 	bool dimension_flatten = false;
8595 
8596 	const auto append_index = [&](uint32_t index, bool is_literal) {
8597 		AccessChainFlags mod_flags = flags;
8598 		if (!is_literal)
8599 			mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
8600 		access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
8601 	};
8602 
8603 	for (uint32_t i = 0; i < count; i++)
8604 	{
8605 		uint32_t index = indices[i];
8606 
8607 		bool is_literal = index_is_literal;
8608 		if (is_literal && msb_is_id && (index >> 31u) != 0u)
8609 		{
8610 			is_literal = false;
8611 			index &= 0x7fffffffu;
8612 		}
8613 
8614 		// Pointer chains
8615 		if (ptr_chain && i == 0)
8616 		{
8617 			// If we are flattening multidimensional arrays, only create opening bracket on first
8618 			// array index.
8619 			if (options.flatten_multidimensional_arrays)
8620 			{
8621 				dimension_flatten = type->array.size() >= 1;
8622 				pending_array_enclose = dimension_flatten;
8623 				if (pending_array_enclose)
8624 					expr += "[";
8625 			}
8626 
8627 			if (options.flatten_multidimensional_arrays && dimension_flatten)
8628 			{
8629 				// If we are flattening multidimensional arrays, do manual stride computation.
8630 				if (is_literal)
8631 					expr += convert_to_string(index);
8632 				else
8633 					expr += to_enclosed_expression(index, register_expression_read);
8634 
8635 				for (auto j = uint32_t(type->array.size()); j; j--)
8636 				{
8637 					expr += " * ";
8638 					expr += enclose_expression(to_array_size(*type, j - 1));
8639 				}
8640 
8641 				if (type->array.empty())
8642 					pending_array_enclose = false;
8643 				else
8644 					expr += " + ";
8645 
8646 				if (!pending_array_enclose)
8647 					expr += "]";
8648 			}
8649 			else
8650 			{
8651 				append_index(index, is_literal);
8652 			}
8653 
8654 			if (type->basetype == SPIRType::ControlPointArray)
8655 			{
8656 				type_id = type->parent_type;
8657 				type = &get<SPIRType>(type_id);
8658 			}
8659 
8660 			access_chain_is_arrayed = true;
8661 		}
8662 		// Arrays
8663 		else if (!type->array.empty())
8664 		{
8665 			// If we are flattening multidimensional arrays, only create opening bracket on first
8666 			// array index.
8667 			if (options.flatten_multidimensional_arrays && !pending_array_enclose)
8668 			{
8669 				dimension_flatten = type->array.size() > 1;
8670 				pending_array_enclose = dimension_flatten;
8671 				if (pending_array_enclose)
8672 					expr += "[";
8673 			}
8674 
8675 			assert(type->parent_type);
8676 
8677 			auto *var = maybe_get<SPIRVariable>(base);
8678 			if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
8679 			    !has_decoration(type->self, DecorationBlock))
8680 			{
8681 				// This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
8682 				// Normally, these variables live in blocks when compiled from GLSL,
8683 				// but HLSL seems to just emit straight arrays here.
8684 				// We must pretend this access goes through gl_in/gl_out arrays
8685 				// to be able to access certain builtins as arrays.
8686 				auto builtin = ir.meta[base].decoration.builtin_type;
8687 				switch (builtin)
8688 				{
8689 				// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
8690 				// case BuiltInClipDistance:
8691 				case BuiltInPosition:
8692 				case BuiltInPointSize:
8693 					if (var->storage == StorageClassInput)
8694 						expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
8695 					else if (var->storage == StorageClassOutput)
8696 						expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
8697 					else
8698 						append_index(index, is_literal);
8699 					break;
8700 
8701 				default:
8702 					append_index(index, is_literal);
8703 					break;
8704 				}
8705 			}
8706 			else if (options.flatten_multidimensional_arrays && dimension_flatten)
8707 			{
8708 				// If we are flattening multidimensional arrays, do manual stride computation.
8709 				auto &parent_type = get<SPIRType>(type->parent_type);
8710 
8711 				if (is_literal)
8712 					expr += convert_to_string(index);
8713 				else
8714 					expr += to_enclosed_expression(index, register_expression_read);
8715 
8716 				for (auto j = uint32_t(parent_type.array.size()); j; j--)
8717 				{
8718 					expr += " * ";
8719 					expr += enclose_expression(to_array_size(parent_type, j - 1));
8720 				}
8721 
8722 				if (parent_type.array.empty())
8723 					pending_array_enclose = false;
8724 				else
8725 					expr += " + ";
8726 
8727 				if (!pending_array_enclose)
8728 					expr += "]";
8729 			}
8730 			// Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
8731 			// By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
8732 			else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
8733 			{
8734 				append_index(index, is_literal);
8735 			}
8736 
8737 			type_id = type->parent_type;
8738 			type = &get<SPIRType>(type_id);
8739 
8740 			access_chain_is_arrayed = true;
8741 		}
8742 		// For structs, the index refers to a constant, which indexes into the members.
8743 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
8744 		else if (type->basetype == SPIRType::Struct)
8745 		{
8746 			if (!is_literal)
8747 				index = evaluate_constant_u32(index);
8748 
8749 			if (index >= type->member_types.size())
8750 				SPIRV_CROSS_THROW("Member index is out of bounds!");
8751 
8752 			BuiltIn builtin;
8753 			if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
8754 			{
8755 				if (access_chain_is_arrayed)
8756 				{
8757 					expr += ".";
8758 					expr += builtin_to_glsl(builtin, type->storage);
8759 				}
8760 				else
8761 					expr = builtin_to_glsl(builtin, type->storage);
8762 			}
8763 			else
8764 			{
8765 				// If the member has a qualified name, use it as the entire chain
8766 				string qual_mbr_name = get_member_qualified_name(type_id, index);
8767 				if (!qual_mbr_name.empty())
8768 					expr = qual_mbr_name;
8769 				else if (flatten_member_reference)
8770 					expr += join("_", to_member_name(*type, index));
8771 				else
8772 					expr += to_member_reference(base, *type, index, ptr_chain);
8773 			}
8774 
8775 			if (has_member_decoration(type->self, index, DecorationInvariant))
8776 				is_invariant = true;
8777 
8778 			is_packed = member_is_packed_physical_type(*type, index);
8779 			if (member_is_remapped_physical_type(*type, index))
8780 				physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
8781 			else
8782 				physical_type = 0;
8783 
8784 			row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
8785 			type = &get<SPIRType>(type->member_types[index]);
8786 		}
8787 		// Matrix -> Vector
8788 		else if (type->columns > 1)
8789 		{
8790 			// If we have a row-major matrix here, we need to defer any transpose in case this access chain
8791 			// is used to store a column. We can resolve it right here and now if we access a scalar directly,
8792 			// by flipping indexing order of the matrix.
8793 
8794 			expr += "[";
8795 			if (is_literal)
8796 				expr += convert_to_string(index);
8797 			else
8798 				expr += to_expression(index, register_expression_read);
8799 			expr += "]";
8800 
8801 			type_id = type->parent_type;
8802 			type = &get<SPIRType>(type_id);
8803 		}
8804 		// Vector -> Scalar
8805 		else if (type->vecsize > 1)
8806 		{
8807 			string deferred_index;
8808 			if (row_major_matrix_needs_conversion)
8809 			{
8810 				// Flip indexing order.
8811 				auto column_index = expr.find_last_of('[');
8812 				if (column_index != string::npos)
8813 				{
8814 					deferred_index = expr.substr(column_index);
8815 					expr.resize(column_index);
8816 				}
8817 			}
8818 
8819 			// Internally, access chain implementation can also be used on composites,
8820 			// ignore scalar access workarounds in this case.
8821 			StorageClass effective_storage = StorageClassGeneric;
8822 			bool ignore_potential_sliced_writes = false;
8823 			if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
8824 			{
8825 				if (expression_type(base).pointer)
8826 					effective_storage = get_expression_effective_storage_class(base);
8827 
8828 				// Special consideration for control points.
8829 				// Control points can only be written by InvocationID, so there is no need
8830 				// to consider scalar access chains here.
8831 				// Cleans up some cases where it's very painful to determine the accurate storage class
8832 				// since blocks can be partially masked ...
8833 				auto *var = maybe_get_backing_variable(base);
8834 				if (var && var->storage == StorageClassOutput &&
8835 				    get_execution_model() == ExecutionModelTessellationControl &&
8836 				    !has_decoration(var->self, DecorationPatch))
8837 				{
8838 					ignore_potential_sliced_writes = true;
8839 				}
8840 			}
8841 			else
8842 				ignore_potential_sliced_writes = true;
8843 
8844 			if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
8845 			{
8846 				// On some backends, we might not be able to safely access individual scalars in a vector.
8847 				// To work around this, we might have to cast the access chain reference to something which can,
8848 				// like a pointer to scalar, which we can then index into.
8849 				prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8850 				                                       is_packed);
8851 			}
8852 
8853 			if (is_literal && !is_packed && !row_major_matrix_needs_conversion)
8854 			{
8855 				expr += ".";
8856 				expr += index_to_swizzle(index);
8857 			}
8858 			else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
8859 			{
8860 				auto &c = get<SPIRConstant>(index);
8861 				if (c.specialization)
8862 				{
8863 					// If the index is a spec constant, we cannot turn extract into a swizzle.
8864 					expr += join("[", to_expression(index), "]");
8865 				}
8866 				else
8867 				{
8868 					expr += ".";
8869 					expr += index_to_swizzle(c.scalar());
8870 				}
8871 			}
8872 			else if (is_literal)
8873 			{
8874 				// For packed vectors, we can only access them as an array, not by swizzle.
8875 				expr += join("[", index, "]");
8876 			}
8877 			else
8878 			{
8879 				expr += "[";
8880 				expr += to_expression(index, register_expression_read);
8881 				expr += "]";
8882 			}
8883 
8884 			if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
8885 			{
8886 				prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8887 				                                       is_packed);
8888 			}
8889 
8890 			expr += deferred_index;
8891 			row_major_matrix_needs_conversion = false;
8892 
8893 			is_packed = false;
8894 			physical_type = 0;
8895 			type_id = type->parent_type;
8896 			type = &get<SPIRType>(type_id);
8897 		}
8898 		else if (!backend.allow_truncated_access_chain)
8899 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
8900 	}
8901 
8902 	if (pending_array_enclose)
8903 	{
8904 		SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
8905 		                  "but the access chain was terminated in the middle of a multidimensional array. "
8906 		                  "This is not supported.");
8907 	}
8908 
8909 	if (meta)
8910 	{
8911 		meta->need_transpose = row_major_matrix_needs_conversion;
8912 		meta->storage_is_packed = is_packed;
8913 		meta->storage_is_invariant = is_invariant;
8914 		meta->storage_physical_type = physical_type;
8915 	}
8916 
8917 	return expr;
8918 }
8919 
prepare_access_chain_for_scalar_access(std::string &,const SPIRType &,spv::StorageClass,bool &)8920 void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
8921 {
8922 }
8923 
to_flattened_struct_member(const string & basename,const SPIRType & type,uint32_t index)8924 string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
8925 {
8926 	auto ret = join(basename, "_", to_member_name(type, index));
8927 	ParsedIR::sanitize_underscores(ret);
8928 	return ret;
8929 }
8930 
access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,AccessChainMeta * meta,bool ptr_chain)8931 string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
8932                                   AccessChainMeta *meta, bool ptr_chain)
8933 {
8934 	if (flattened_buffer_blocks.count(base))
8935 	{
8936 		uint32_t matrix_stride = 0;
8937 		uint32_t array_stride = 0;
8938 		bool need_transpose = false;
8939 		flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
8940 		                              &array_stride, ptr_chain);
8941 
8942 		if (meta)
8943 		{
8944 			meta->need_transpose = target_type.columns > 1 && need_transpose;
8945 			meta->storage_is_packed = false;
8946 		}
8947 
8948 		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
8949 		                              need_transpose);
8950 	}
8951 	else if (flattened_structs.count(base) && count > 0)
8952 	{
8953 		AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
8954 		if (ptr_chain)
8955 			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
8956 
8957 		if (flattened_structs[base])
8958 		{
8959 			flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
8960 			if (meta)
8961 				meta->flattened_struct = target_type.basetype == SPIRType::Struct;
8962 		}
8963 
8964 		auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
8965 		if (meta)
8966 		{
8967 			meta->need_transpose = false;
8968 			meta->storage_is_packed = false;
8969 		}
8970 
8971 		auto basename = to_flattened_access_chain_expression(base);
8972 		auto ret = join(basename, "_", chain);
8973 		ParsedIR::sanitize_underscores(ret);
8974 		return ret;
8975 	}
8976 	else
8977 	{
8978 		AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
8979 		if (ptr_chain)
8980 			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
8981 		return access_chain_internal(base, indices, count, flags, meta);
8982 	}
8983 }
8984 
load_flattened_struct(const string & basename,const SPIRType & type)8985 string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
8986 {
8987 	auto expr = type_to_glsl_constructor(type);
8988 	expr += '(';
8989 
8990 	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
8991 	{
8992 		if (i)
8993 			expr += ", ";
8994 
8995 		auto &member_type = get<SPIRType>(type.member_types[i]);
8996 		if (member_type.basetype == SPIRType::Struct)
8997 			expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
8998 		else
8999 			expr += to_flattened_struct_member(basename, type, i);
9000 	}
9001 	expr += ')';
9002 	return expr;
9003 }
9004 
to_flattened_access_chain_expression(uint32_t id)9005 std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
9006 {
9007 	// Do not use to_expression as that will unflatten access chains.
9008 	string basename;
9009 	if (const auto *var = maybe_get<SPIRVariable>(id))
9010 		basename = to_name(var->self);
9011 	else if (const auto *expr = maybe_get<SPIRExpression>(id))
9012 		basename = expr->expression;
9013 	else
9014 		basename = to_expression(id);
9015 
9016 	return basename;
9017 }
9018 
store_flattened_struct(const string & basename,uint32_t rhs_id,const SPIRType & type,const SmallVector<uint32_t> & indices)9019 void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
9020                                           const SmallVector<uint32_t> &indices)
9021 {
9022 	SmallVector<uint32_t> sub_indices = indices;
9023 	sub_indices.push_back(0);
9024 
9025 	auto *member_type = &type;
9026 	for (auto &index : indices)
9027 		member_type = &get<SPIRType>(member_type->member_types[index]);
9028 
9029 	for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
9030 	{
9031 		sub_indices.back() = i;
9032 		auto lhs = join(basename, "_", to_member_name(*member_type, i));
9033 		ParsedIR::sanitize_underscores(lhs);
9034 
9035 		if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
9036 		{
9037 			store_flattened_struct(lhs, rhs_id, type, sub_indices);
9038 		}
9039 		else
9040 		{
9041 			auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
9042 			statement(lhs, " = ", rhs, ";");
9043 		}
9044 	}
9045 }
9046 
store_flattened_struct(uint32_t lhs_id,uint32_t value)9047 void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
9048 {
9049 	auto &type = expression_type(lhs_id);
9050 	auto basename = to_flattened_access_chain_expression(lhs_id);
9051 	store_flattened_struct(basename, value, type, {});
9052 }
9053 
flattened_access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,uint32_t,bool need_transpose)9054 std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
9055                                                  const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
9056                                                  uint32_t /* array_stride */, bool need_transpose)
9057 {
9058 	if (!target_type.array.empty())
9059 		SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
9060 	else if (target_type.basetype == SPIRType::Struct)
9061 		return flattened_access_chain_struct(base, indices, count, target_type, offset);
9062 	else if (target_type.columns > 1)
9063 		return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9064 	else
9065 		return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9066 }
9067 
flattened_access_chain_struct(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset)9068 std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
9069                                                         const SPIRType &target_type, uint32_t offset)
9070 {
9071 	std::string expr;
9072 
9073 	expr += type_to_glsl_constructor(target_type);
9074 	expr += "(";
9075 
9076 	for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
9077 	{
9078 		if (i != 0)
9079 			expr += ", ";
9080 
9081 		const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
9082 		uint32_t member_offset = type_struct_member_offset(target_type, i);
9083 
9084 		// The access chain terminates at the struct, so we need to find matrix strides and row-major information
9085 		// ahead of time.
9086 		bool need_transpose = false;
9087 		uint32_t matrix_stride = 0;
9088 		if (member_type.columns > 1)
9089 		{
9090 			need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
9091 			matrix_stride = type_struct_member_matrix_stride(target_type, i);
9092 		}
9093 
9094 		auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
9095 		                                  0 /* array_stride */, need_transpose);
9096 
9097 		// Cannot forward transpositions, so resolve them here.
9098 		if (need_transpose)
9099 			expr += convert_row_major_matrix(tmp, member_type, 0, false);
9100 		else
9101 			expr += tmp;
9102 	}
9103 
9104 	expr += ")";
9105 
9106 	return expr;
9107 }
9108 
flattened_access_chain_matrix(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)9109 std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
9110                                                         const SPIRType &target_type, uint32_t offset,
9111                                                         uint32_t matrix_stride, bool need_transpose)
9112 {
9113 	assert(matrix_stride);
9114 	SPIRType tmp_type = target_type;
9115 	if (need_transpose)
9116 		swap(tmp_type.vecsize, tmp_type.columns);
9117 
9118 	std::string expr;
9119 
9120 	expr += type_to_glsl_constructor(tmp_type);
9121 	expr += "(";
9122 
9123 	for (uint32_t i = 0; i < tmp_type.columns; i++)
9124 	{
9125 		if (i != 0)
9126 			expr += ", ";
9127 
9128 		expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
9129 		                                      /* need_transpose= */ false);
9130 	}
9131 
9132 	expr += ")";
9133 
9134 	return expr;
9135 }
9136 
flattened_access_chain_vector(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)9137 std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
9138                                                         const SPIRType &target_type, uint32_t offset,
9139                                                         uint32_t matrix_stride, bool need_transpose)
9140 {
9141 	auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
9142 
9143 	auto buffer_name = to_name(expression_type(base).self);
9144 
9145 	if (need_transpose)
9146 	{
9147 		std::string expr;
9148 
9149 		if (target_type.vecsize > 1)
9150 		{
9151 			expr += type_to_glsl_constructor(target_type);
9152 			expr += "(";
9153 		}
9154 
9155 		for (uint32_t i = 0; i < target_type.vecsize; ++i)
9156 		{
9157 			if (i != 0)
9158 				expr += ", ";
9159 
9160 			uint32_t component_offset = result.second + i * matrix_stride;
9161 
9162 			assert(component_offset % (target_type.width / 8) == 0);
9163 			uint32_t index = component_offset / (target_type.width / 8);
9164 
9165 			expr += buffer_name;
9166 			expr += "[";
9167 			expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
9168 			expr += convert_to_string(index / 4);
9169 			expr += "]";
9170 
9171 			expr += vector_swizzle(1, index % 4);
9172 		}
9173 
9174 		if (target_type.vecsize > 1)
9175 		{
9176 			expr += ")";
9177 		}
9178 
9179 		return expr;
9180 	}
9181 	else
9182 	{
9183 		assert(result.second % (target_type.width / 8) == 0);
9184 		uint32_t index = result.second / (target_type.width / 8);
9185 
9186 		std::string expr;
9187 
9188 		expr += buffer_name;
9189 		expr += "[";
9190 		expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
9191 		expr += convert_to_string(index / 4);
9192 		expr += "]";
9193 
9194 		expr += vector_swizzle(target_type.vecsize, index % 4);
9195 
9196 		return expr;
9197 	}
9198 }
9199 
flattened_access_chain_offset(const SPIRType & basetype,const uint32_t * indices,uint32_t count,uint32_t offset,uint32_t word_stride,bool * need_transpose,uint32_t * out_matrix_stride,uint32_t * out_array_stride,bool ptr_chain)9200 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
9201     const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
9202     bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
9203 {
9204 	// Start traversing type hierarchy at the proper non-pointer types.
9205 	const auto *type = &get_pointee_type(basetype);
9206 
9207 	std::string expr;
9208 
9209 	// Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
9210 	bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
9211 	uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
9212 	uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
9213 
9214 	for (uint32_t i = 0; i < count; i++)
9215 	{
9216 		uint32_t index = indices[i];
9217 
9218 		// Pointers
9219 		if (ptr_chain && i == 0)
9220 		{
9221 			// Here, the pointer type will be decorated with an array stride.
9222 			array_stride = get_decoration(basetype.self, DecorationArrayStride);
9223 			if (!array_stride)
9224 				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
9225 
9226 			auto *constant = maybe_get<SPIRConstant>(index);
9227 			if (constant)
9228 			{
9229 				// Constant array access.
9230 				offset += constant->scalar() * array_stride;
9231 			}
9232 			else
9233 			{
9234 				// Dynamic array access.
9235 				if (array_stride % word_stride)
9236 				{
9237 					SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9238 					                  "of a 4-component vector. "
9239 					                  "Likely culprit here is a float or vec2 array inside a push "
9240 					                  "constant block which is std430. "
9241 					                  "This cannot be flattened. Try using std140 layout instead.");
9242 				}
9243 
9244 				expr += to_enclosed_expression(index);
9245 				expr += " * ";
9246 				expr += convert_to_string(array_stride / word_stride);
9247 				expr += " + ";
9248 			}
9249 		}
9250 		// Arrays
9251 		else if (!type->array.empty())
9252 		{
9253 			auto *constant = maybe_get<SPIRConstant>(index);
9254 			if (constant)
9255 			{
9256 				// Constant array access.
9257 				offset += constant->scalar() * array_stride;
9258 			}
9259 			else
9260 			{
9261 				// Dynamic array access.
9262 				if (array_stride % word_stride)
9263 				{
9264 					SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9265 					                  "of a 4-component vector. "
9266 					                  "Likely culprit here is a float or vec2 array inside a push "
9267 					                  "constant block which is std430. "
9268 					                  "This cannot be flattened. Try using std140 layout instead.");
9269 				}
9270 
9271 				expr += to_enclosed_expression(index, false);
9272 				expr += " * ";
9273 				expr += convert_to_string(array_stride / word_stride);
9274 				expr += " + ";
9275 			}
9276 
9277 			uint32_t parent_type = type->parent_type;
9278 			type = &get<SPIRType>(parent_type);
9279 
9280 			if (!type->array.empty())
9281 				array_stride = get_decoration(parent_type, DecorationArrayStride);
9282 		}
9283 		// For structs, the index refers to a constant, which indexes into the members.
9284 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
9285 		else if (type->basetype == SPIRType::Struct)
9286 		{
9287 			index = evaluate_constant_u32(index);
9288 
9289 			if (index >= type->member_types.size())
9290 				SPIRV_CROSS_THROW("Member index is out of bounds!");
9291 
9292 			offset += type_struct_member_offset(*type, index);
9293 
9294 			auto &struct_type = *type;
9295 			type = &get<SPIRType>(type->member_types[index]);
9296 
9297 			if (type->columns > 1)
9298 			{
9299 				matrix_stride = type_struct_member_matrix_stride(struct_type, index);
9300 				row_major_matrix_needs_conversion =
9301 				    combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
9302 			}
9303 			else
9304 				row_major_matrix_needs_conversion = false;
9305 
9306 			if (!type->array.empty())
9307 				array_stride = type_struct_member_array_stride(struct_type, index);
9308 		}
9309 		// Matrix -> Vector
9310 		else if (type->columns > 1)
9311 		{
9312 			auto *constant = maybe_get<SPIRConstant>(index);
9313 			if (constant)
9314 			{
9315 				index = evaluate_constant_u32(index);
9316 				offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
9317 			}
9318 			else
9319 			{
9320 				uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
9321 				// Dynamic array access.
9322 				if (indexing_stride % word_stride)
9323 				{
9324 					SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
9325 					                  "4-component vector. "
9326 					                  "Likely culprit here is a row-major matrix being accessed dynamically. "
9327 					                  "This cannot be flattened. Try using std140 layout instead.");
9328 				}
9329 
9330 				expr += to_enclosed_expression(index, false);
9331 				expr += " * ";
9332 				expr += convert_to_string(indexing_stride / word_stride);
9333 				expr += " + ";
9334 			}
9335 
9336 			type = &get<SPIRType>(type->parent_type);
9337 		}
9338 		// Vector -> Scalar
9339 		else if (type->vecsize > 1)
9340 		{
9341 			auto *constant = maybe_get<SPIRConstant>(index);
9342 			if (constant)
9343 			{
9344 				index = evaluate_constant_u32(index);
9345 				offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
9346 			}
9347 			else
9348 			{
9349 				uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
9350 
9351 				// Dynamic array access.
9352 				if (indexing_stride % word_stride)
9353 				{
9354 					SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
9355 					                  "size of a 4-component vector. "
9356 					                  "This cannot be flattened in legacy targets.");
9357 				}
9358 
9359 				expr += to_enclosed_expression(index, false);
9360 				expr += " * ";
9361 				expr += convert_to_string(indexing_stride / word_stride);
9362 				expr += " + ";
9363 			}
9364 
9365 			type = &get<SPIRType>(type->parent_type);
9366 		}
9367 		else
9368 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9369 	}
9370 
9371 	if (need_transpose)
9372 		*need_transpose = row_major_matrix_needs_conversion;
9373 	if (out_matrix_stride)
9374 		*out_matrix_stride = matrix_stride;
9375 	if (out_array_stride)
9376 		*out_array_stride = array_stride;
9377 
9378 	return std::make_pair(expr, offset);
9379 }
9380 
should_dereference(uint32_t id)9381 bool CompilerGLSL::should_dereference(uint32_t id)
9382 {
9383 	const auto &type = expression_type(id);
9384 	// Non-pointer expressions don't need to be dereferenced.
9385 	if (!type.pointer)
9386 		return false;
9387 
9388 	// Handles shouldn't be dereferenced either.
9389 	if (!expression_is_lvalue(id))
9390 		return false;
9391 
9392 	// If id is a variable but not a phi variable, we should not dereference it.
9393 	if (auto *var = maybe_get<SPIRVariable>(id))
9394 		return var->phi_variable;
9395 
9396 	// If id is an access chain, we should not dereference it.
9397 	if (auto *expr = maybe_get<SPIRExpression>(id))
9398 		return !expr->access_chain;
9399 
9400 	// Otherwise, we should dereference this pointer expression.
9401 	return true;
9402 }
9403 
should_forward(uint32_t id) const9404 bool CompilerGLSL::should_forward(uint32_t id) const
9405 {
9406 	// If id is a variable we will try to forward it regardless of force_temporary check below
9407 	// This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
9408 	auto *var = maybe_get<SPIRVariable>(id);
9409 	if (var && var->forwardable)
9410 		return true;
9411 
9412 	// For debugging emit temporary variables for all expressions
9413 	if (options.force_temporary)
9414 		return false;
9415 
9416 	// Immutable expression can always be forwarded.
9417 	if (is_immutable(id))
9418 		return true;
9419 
9420 	return false;
9421 }
9422 
should_suppress_usage_tracking(uint32_t id) const9423 bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
9424 {
9425 	// Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
9426 	return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
9427 }
9428 
track_expression_read(uint32_t id)9429 void CompilerGLSL::track_expression_read(uint32_t id)
9430 {
9431 	switch (ir.ids[id].get_type())
9432 	{
9433 	case TypeExpression:
9434 	{
9435 		auto &e = get<SPIRExpression>(id);
9436 		for (auto implied_read : e.implied_read_expressions)
9437 			track_expression_read(implied_read);
9438 		break;
9439 	}
9440 
9441 	case TypeAccessChain:
9442 	{
9443 		auto &e = get<SPIRAccessChain>(id);
9444 		for (auto implied_read : e.implied_read_expressions)
9445 			track_expression_read(implied_read);
9446 		break;
9447 	}
9448 
9449 	default:
9450 		break;
9451 	}
9452 
9453 	// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
9454 	// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
9455 	if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
9456 	{
9457 		auto &v = expression_usage_counts[id];
9458 		v++;
9459 
9460 		// If we create an expression outside a loop,
9461 		// but access it inside a loop, we're implicitly reading it multiple times.
9462 		// If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
9463 		// working inside the backend compiler.
9464 		if (expression_read_implies_multiple_reads(id))
9465 			v++;
9466 
9467 		if (v >= 2)
9468 		{
9469 			//if (v == 2)
9470 			//    fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
9471 
9472 			forced_temporaries.insert(id);
9473 			// Force a recompile after this pass to avoid forwarding this variable.
9474 			force_recompile();
9475 		}
9476 	}
9477 }
9478 
args_will_forward(uint32_t id,const uint32_t * args,uint32_t num_args,bool pure)9479 bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
9480 {
9481 	if (forced_temporaries.find(id) != end(forced_temporaries))
9482 		return false;
9483 
9484 	for (uint32_t i = 0; i < num_args; i++)
9485 		if (!should_forward(args[i]))
9486 			return false;
9487 
9488 	// We need to forward globals as well.
9489 	if (!pure)
9490 	{
9491 		for (auto global : global_variables)
9492 			if (!should_forward(global))
9493 				return false;
9494 		for (auto aliased : aliased_variables)
9495 			if (!should_forward(aliased))
9496 				return false;
9497 	}
9498 
9499 	return true;
9500 }
9501 
register_impure_function_call()9502 void CompilerGLSL::register_impure_function_call()
9503 {
9504 	// Impure functions can modify globals and aliased variables, so invalidate them as well.
9505 	for (auto global : global_variables)
9506 		flush_dependees(get<SPIRVariable>(global));
9507 	for (auto aliased : aliased_variables)
9508 		flush_dependees(get<SPIRVariable>(aliased));
9509 }
9510 
register_call_out_argument(uint32_t id)9511 void CompilerGLSL::register_call_out_argument(uint32_t id)
9512 {
9513 	register_write(id);
9514 
9515 	auto *var = maybe_get<SPIRVariable>(id);
9516 	if (var)
9517 		flush_variable_declaration(var->self);
9518 }
9519 
variable_decl_function_local(SPIRVariable & var)9520 string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
9521 {
9522 	// These variables are always function local,
9523 	// so make sure we emit the variable without storage qualifiers.
9524 	// Some backends will inject custom variables locally in a function
9525 	// with a storage qualifier which is not function-local.
9526 	auto old_storage = var.storage;
9527 	var.storage = StorageClassFunction;
9528 	auto expr = variable_decl(var);
9529 	var.storage = old_storage;
9530 	return expr;
9531 }
9532 
emit_variable_temporary_copies(const SPIRVariable & var)9533 void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
9534 {
9535 	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9536 	if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
9537 	{
9538 		auto &type = get<SPIRType>(var.basetype);
9539 		auto &flags = get_decoration_bitset(var.self);
9540 		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
9541 		flushed_phi_variables.insert(var.self);
9542 	}
9543 }
9544 
flush_variable_declaration(uint32_t id)9545 void CompilerGLSL::flush_variable_declaration(uint32_t id)
9546 {
9547 	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9548 	auto *var = maybe_get<SPIRVariable>(id);
9549 	if (var && var->deferred_declaration)
9550 	{
9551 		string initializer;
9552 		if (options.force_zero_initialized_variables &&
9553 		    (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
9554 		     var->storage == StorageClassPrivate) &&
9555 		    !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
9556 		{
9557 			initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
9558 		}
9559 
9560 		statement(variable_decl_function_local(*var), initializer, ";");
9561 		var->deferred_declaration = false;
9562 	}
9563 	if (var)
9564 	{
9565 		emit_variable_temporary_copies(*var);
9566 	}
9567 }
9568 
remove_duplicate_swizzle(string & op)9569 bool CompilerGLSL::remove_duplicate_swizzle(string &op)
9570 {
9571 	auto pos = op.find_last_of('.');
9572 	if (pos == string::npos || pos == 0)
9573 		return false;
9574 
9575 	string final_swiz = op.substr(pos + 1, string::npos);
9576 
9577 	if (backend.swizzle_is_function)
9578 	{
9579 		if (final_swiz.size() < 2)
9580 			return false;
9581 
9582 		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9583 			final_swiz.erase(final_swiz.size() - 2, string::npos);
9584 		else
9585 			return false;
9586 	}
9587 
9588 	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9589 	// If so, and previous swizzle is of same length,
9590 	// we can drop the final swizzle altogether.
9591 	for (uint32_t i = 0; i < final_swiz.size(); i++)
9592 	{
9593 		static const char expected[] = { 'x', 'y', 'z', 'w' };
9594 		if (i >= 4 || final_swiz[i] != expected[i])
9595 			return false;
9596 	}
9597 
9598 	auto prevpos = op.find_last_of('.', pos - 1);
9599 	if (prevpos == string::npos)
9600 		return false;
9601 
9602 	prevpos++;
9603 
9604 	// Make sure there are only swizzles here ...
9605 	for (auto i = prevpos; i < pos; i++)
9606 	{
9607 		if (op[i] < 'w' || op[i] > 'z')
9608 		{
9609 			// If swizzles are foo.xyz() like in C++ backend for example, check for that.
9610 			if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
9611 				break;
9612 			return false;
9613 		}
9614 	}
9615 
9616 	// If original swizzle is large enough, just carve out the components we need.
9617 	// E.g. foobar.wyx.xy will turn into foobar.wy.
9618 	if (pos - prevpos >= final_swiz.size())
9619 	{
9620 		op.erase(prevpos + final_swiz.size(), string::npos);
9621 
9622 		// Add back the function call ...
9623 		if (backend.swizzle_is_function)
9624 			op += "()";
9625 	}
9626 	return true;
9627 }
9628 
9629 // Optimizes away vector swizzles where we have something like
9630 // vec3 foo;
9631 // foo.xyz <-- swizzle expression does nothing.
9632 // This is a very common pattern after OpCompositeCombine.
remove_unity_swizzle(uint32_t base,string & op)9633 bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
9634 {
9635 	auto pos = op.find_last_of('.');
9636 	if (pos == string::npos || pos == 0)
9637 		return false;
9638 
9639 	string final_swiz = op.substr(pos + 1, string::npos);
9640 
9641 	if (backend.swizzle_is_function)
9642 	{
9643 		if (final_swiz.size() < 2)
9644 			return false;
9645 
9646 		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9647 			final_swiz.erase(final_swiz.size() - 2, string::npos);
9648 		else
9649 			return false;
9650 	}
9651 
9652 	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9653 	// If so, and previous swizzle is of same length,
9654 	// we can drop the final swizzle altogether.
9655 	for (uint32_t i = 0; i < final_swiz.size(); i++)
9656 	{
9657 		static const char expected[] = { 'x', 'y', 'z', 'w' };
9658 		if (i >= 4 || final_swiz[i] != expected[i])
9659 			return false;
9660 	}
9661 
9662 	auto &type = expression_type(base);
9663 
9664 	// Sanity checking ...
9665 	assert(type.columns == 1 && type.array.empty());
9666 
9667 	if (type.vecsize == final_swiz.size())
9668 		op.erase(pos, string::npos);
9669 	return true;
9670 }
9671 
build_composite_combiner(uint32_t return_type,const uint32_t * elems,uint32_t length)9672 string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
9673 {
9674 	ID base = 0;
9675 	string op;
9676 	string subop;
9677 
9678 	// Can only merge swizzles for vectors.
9679 	auto &type = get<SPIRType>(return_type);
9680 	bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
9681 	bool swizzle_optimization = false;
9682 
9683 	for (uint32_t i = 0; i < length; i++)
9684 	{
9685 		auto *e = maybe_get<SPIRExpression>(elems[i]);
9686 
9687 		// If we're merging another scalar which belongs to the same base
9688 		// object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
9689 		if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
9690 		{
9691 			// Only supposed to be used for vector swizzle -> scalar.
9692 			assert(!e->expression.empty() && e->expression.front() == '.');
9693 			subop += e->expression.substr(1, string::npos);
9694 			swizzle_optimization = true;
9695 		}
9696 		else
9697 		{
9698 			// We'll likely end up with duplicated swizzles, e.g.
9699 			// foobar.xyz.xyz from patterns like
9700 			// OpVectorShuffle
9701 			// OpCompositeExtract x 3
9702 			// OpCompositeConstruct 3x + other scalar.
9703 			// Just modify op in-place.
9704 			if (swizzle_optimization)
9705 			{
9706 				if (backend.swizzle_is_function)
9707 					subop += "()";
9708 
9709 				// Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
9710 				// The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
9711 				// We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
9712 				// Essentially, we can only remove one set of swizzles, since that's what we have control over ...
9713 				// Case 1:
9714 				//  foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
9715 				//               foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
9716 				// Case 2:
9717 				//  foo.xyz: Duplicate swizzle won't kick in.
9718 				//           If foo is vec3, we can remove xyz, giving just foo.
9719 				if (!remove_duplicate_swizzle(subop))
9720 					remove_unity_swizzle(base, subop);
9721 
9722 				// Strips away redundant parens if we created them during component extraction.
9723 				strip_enclosed_expression(subop);
9724 				swizzle_optimization = false;
9725 				op += subop;
9726 			}
9727 			else
9728 				op += subop;
9729 
9730 			if (i)
9731 				op += ", ";
9732 
9733 			bool uses_buffer_offset =
9734 			    type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
9735 			subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
9736 		}
9737 
9738 		base = e ? e->base_expression : ID(0);
9739 	}
9740 
9741 	if (swizzle_optimization)
9742 	{
9743 		if (backend.swizzle_is_function)
9744 			subop += "()";
9745 
9746 		if (!remove_duplicate_swizzle(subop))
9747 			remove_unity_swizzle(base, subop);
9748 		// Strips away redundant parens if we created them during component extraction.
9749 		strip_enclosed_expression(subop);
9750 	}
9751 
9752 	op += subop;
9753 	return op;
9754 }
9755 
skip_argument(uint32_t id) const9756 bool CompilerGLSL::skip_argument(uint32_t id) const
9757 {
9758 	if (!combined_image_samplers.empty() || !options.vulkan_semantics)
9759 	{
9760 		auto &type = expression_type(id);
9761 		if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
9762 			return true;
9763 	}
9764 	return false;
9765 }
9766 
optimize_read_modify_write(const SPIRType & type,const string & lhs,const string & rhs)9767 bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
9768 {
9769 	// Do this with strings because we have a very clear pattern we can check for and it avoids
9770 	// adding lots of special cases to the code emission.
9771 	if (rhs.size() < lhs.size() + 3)
9772 		return false;
9773 
9774 	// Do not optimize matrices. They are a bit awkward to reason about in general
9775 	// (in which order does operation happen?), and it does not work on MSL anyways.
9776 	if (type.vecsize > 1 && type.columns > 1)
9777 		return false;
9778 
9779 	auto index = rhs.find(lhs);
9780 	if (index != 0)
9781 		return false;
9782 
9783 	// TODO: Shift operators, but it's not important for now.
9784 	auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
9785 	if (op != lhs.size() + 1)
9786 		return false;
9787 
9788 	// Check that the op is followed by space. This excludes && and ||.
9789 	if (rhs[op + 1] != ' ')
9790 		return false;
9791 
9792 	char bop = rhs[op];
9793 	auto expr = rhs.substr(lhs.size() + 3);
9794 	// Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
9795 	// Find some common patterns which are equivalent.
9796 	if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
9797 		statement(lhs, bop, bop, ";");
9798 	else
9799 		statement(lhs, " ", bop, "= ", expr, ";");
9800 	return true;
9801 }
9802 
register_control_dependent_expression(uint32_t expr)9803 void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
9804 {
9805 	if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
9806 		return;
9807 
9808 	assert(current_emitting_block);
9809 	current_emitting_block->invalidate_expressions.push_back(expr);
9810 }
9811 
emit_block_instructions(SPIRBlock & block)9812 void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
9813 {
9814 	current_emitting_block = &block;
9815 	for (auto &op : block.ops)
9816 		emit_instruction(op);
9817 	current_emitting_block = nullptr;
9818 }
9819 
disallow_forwarding_in_expression_chain(const SPIRExpression & expr)9820 void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
9821 {
9822 	// Allow trivially forwarded expressions like OpLoad or trivial shuffles,
9823 	// these will be marked as having suppressed usage tracking.
9824 	// Our only concern is to make sure arithmetic operations are done in similar ways.
9825 	if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
9826 	    forced_invariant_temporaries.count(expr.self) == 0)
9827 	{
9828 		forced_temporaries.insert(expr.self);
9829 		forced_invariant_temporaries.insert(expr.self);
9830 		force_recompile();
9831 
9832 		for (auto &dependent : expr.expression_dependencies)
9833 			disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
9834 	}
9835 }
9836 
handle_store_to_invariant_variable(uint32_t store_id,uint32_t value_id)9837 void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
9838 {
9839 	// Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
9840 	// this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
9841 	// in one translation unit, but not another, e.g. due to multiple use of an expression.
9842 	// This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
9843 	// expressions to be temporaries.
9844 	// It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
9845 	// for all reasonable uses of invariant.
9846 	if (!has_decoration(store_id, DecorationInvariant))
9847 		return;
9848 
9849 	auto *expr = maybe_get<SPIRExpression>(value_id);
9850 	if (!expr)
9851 		return;
9852 
9853 	disallow_forwarding_in_expression_chain(*expr);
9854 }
9855 
emit_store_statement(uint32_t lhs_expression,uint32_t rhs_expression)9856 void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
9857 {
9858 	auto rhs = to_pointer_expression(rhs_expression);
9859 
9860 	// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
9861 	if (!rhs.empty())
9862 	{
9863 		handle_store_to_invariant_variable(lhs_expression, rhs_expression);
9864 
9865 		if (!unroll_array_to_complex_store(lhs_expression, rhs_expression))
9866 		{
9867 			auto lhs = to_dereferenced_expression(lhs_expression);
9868 			if (has_decoration(lhs_expression, DecorationNonUniform))
9869 				convert_non_uniform_expression(lhs, lhs_expression);
9870 
9871 			// We might need to cast in order to store to a builtin.
9872 			cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
9873 
9874 			// Tries to optimize assignments like "<lhs> = <lhs> op expr".
9875 			// While this is purely cosmetic, this is important for legacy ESSL where loop
9876 			// variable increments must be in either i++ or i += const-expr.
9877 			// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
9878 			if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
9879 				statement(lhs, " = ", rhs, ";");
9880 		}
9881 		register_write(lhs_expression);
9882 	}
9883 }
9884 
get_integer_width_for_instruction(const Instruction & instr) const9885 uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
9886 {
9887 	if (instr.length < 3)
9888 		return 32;
9889 
9890 	auto *ops = stream(instr);
9891 
9892 	switch (instr.op)
9893 	{
9894 	case OpSConvert:
9895 	case OpConvertSToF:
9896 	case OpUConvert:
9897 	case OpConvertUToF:
9898 	case OpIEqual:
9899 	case OpINotEqual:
9900 	case OpSLessThan:
9901 	case OpSLessThanEqual:
9902 	case OpSGreaterThan:
9903 	case OpSGreaterThanEqual:
9904 	case OpULessThan:
9905 	case OpULessThanEqual:
9906 	case OpUGreaterThan:
9907 	case OpUGreaterThanEqual:
9908 		return expression_type(ops[2]).width;
9909 
9910 	default:
9911 	{
9912 		// We can look at result type which is more robust.
9913 		auto *type = maybe_get<SPIRType>(ops[0]);
9914 		if (type && type_is_integral(*type))
9915 			return type->width;
9916 		else
9917 			return 32;
9918 	}
9919 	}
9920 }
9921 
get_integer_width_for_glsl_instruction(GLSLstd450 op,const uint32_t * ops,uint32_t length) const9922 uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
9923 {
9924 	if (length < 1)
9925 		return 32;
9926 
9927 	switch (op)
9928 	{
9929 	case GLSLstd450SAbs:
9930 	case GLSLstd450SSign:
9931 	case GLSLstd450UMin:
9932 	case GLSLstd450SMin:
9933 	case GLSLstd450UMax:
9934 	case GLSLstd450SMax:
9935 	case GLSLstd450UClamp:
9936 	case GLSLstd450SClamp:
9937 	case GLSLstd450FindSMsb:
9938 	case GLSLstd450FindUMsb:
9939 		return expression_type(ops[0]).width;
9940 
9941 	default:
9942 	{
9943 		// We don't need to care about other opcodes, just return 32.
9944 		return 32;
9945 	}
9946 	}
9947 }
9948 
emit_instruction(const Instruction & instruction)9949 void CompilerGLSL::emit_instruction(const Instruction &instruction)
9950 {
9951 	auto ops = stream(instruction);
9952 	auto opcode = static_cast<Op>(instruction.op);
9953 	uint32_t length = instruction.length;
9954 
9955 #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
9956 #define GLSL_BOP_CAST(op, type) \
9957 	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
9958 #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
9959 #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
9960 #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
9961 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
9962 #define GLSL_BFOP_CAST(op, type) \
9963 	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
9964 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
9965 #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
9966 
9967 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
9968 	uint32_t integer_width = get_integer_width_for_instruction(instruction);
9969 	auto int_type = to_signed_basetype(integer_width);
9970 	auto uint_type = to_unsigned_basetype(integer_width);
9971 
9972 	switch (opcode)
9973 	{
9974 	// Dealing with memory
9975 	case OpLoad:
9976 	{
9977 		uint32_t result_type = ops[0];
9978 		uint32_t id = ops[1];
9979 		uint32_t ptr = ops[2];
9980 
9981 		flush_variable_declaration(ptr);
9982 
9983 		// If we're loading from memory that cannot be changed by the shader,
9984 		// just forward the expression directly to avoid needless temporaries.
9985 		// If an expression is mutable and forwardable, we speculate that it is immutable.
9986 		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
9987 
9988 		// If loading a non-native row-major matrix, mark the expression as need_transpose.
9989 		bool need_transpose = false;
9990 		bool old_need_transpose = false;
9991 
9992 		auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
9993 
9994 		if (forward)
9995 		{
9996 			// If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
9997 			// taking the expression.
9998 			if (ptr_expression && ptr_expression->need_transpose)
9999 			{
10000 				old_need_transpose = true;
10001 				ptr_expression->need_transpose = false;
10002 				need_transpose = true;
10003 			}
10004 			else if (is_non_native_row_major_matrix(ptr))
10005 				need_transpose = true;
10006 		}
10007 
10008 		// If we are forwarding this load,
10009 		// don't register the read to access chain here, defer that to when we actually use the expression,
10010 		// using the add_implied_read_expression mechanism.
10011 		string expr;
10012 
10013 		bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
10014 		bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
10015 		if (forward || (!is_packed && !is_remapped))
10016 		{
10017 			// For the simple case, we do not need to deal with repacking.
10018 			expr = to_dereferenced_expression(ptr, false);
10019 		}
10020 		else
10021 		{
10022 			// If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
10023 			// storing the expression to a temporary.
10024 			expr = to_unpacked_expression(ptr);
10025 		}
10026 
10027 		auto &type = get<SPIRType>(result_type);
10028 		auto &expr_type = expression_type(ptr);
10029 
10030 		// If the expression has more vector components than the result type, insert
10031 		// a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
10032 		// happen with e.g. the MSL backend replacing the type of an input variable.
10033 		if (expr_type.vecsize > type.vecsize)
10034 			expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
10035 
10036 		// We might need to cast in order to load from a builtin.
10037 		cast_from_builtin_load(ptr, expr, type);
10038 
10039 		// We might be trying to load a gl_Position[N], where we should be
10040 		// doing float4[](gl_in[i].gl_Position, ...) instead.
10041 		// Similar workarounds are required for input arrays in tessellation.
10042 		// Also, loading from gl_SampleMask array needs special unroll.
10043 		unroll_array_from_complex_load(id, ptr, expr);
10044 
10045 		if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform))
10046 		{
10047 			// If we're loading something non-opaque, we need to handle non-uniform descriptor access.
10048 			convert_non_uniform_expression(expr, ptr);
10049 		}
10050 
10051 		if (forward && ptr_expression)
10052 			ptr_expression->need_transpose = old_need_transpose;
10053 
10054 		bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
10055 
10056 		if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
10057 			rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
10058 
10059 		// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
10060 		// However, if we try to load a complex, composite object from a flattened buffer,
10061 		// we should avoid emitting the same code over and over and lower the result to a temporary.
10062 		bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
10063 
10064 		SPIRExpression *e = nullptr;
10065 		if (!forward && expression_is_non_value_type_array(ptr))
10066 		{
10067 			// Complicated load case where we need to make a copy of ptr, but we cannot, because
10068 			// it is an array, and our backend does not support arrays as value types.
10069 			// Emit the temporary, and copy it explicitly.
10070 			e = &emit_uninitialized_temporary_expression(result_type, id);
10071 			emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
10072 		}
10073 		else
10074 			e = &emit_op(result_type, id, expr, forward, !usage_tracking);
10075 
10076 		e->need_transpose = need_transpose;
10077 		register_read(id, ptr, forward);
10078 
10079 		if (forward)
10080 		{
10081 			// Pass through whether the result is of a packed type and the physical type ID.
10082 			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
10083 				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10084 			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
10085 			{
10086 				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
10087 				                        get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
10088 			}
10089 		}
10090 		else
10091 		{
10092 			// This might have been set on an earlier compilation iteration, force it to be unset.
10093 			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10094 			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
10095 		}
10096 
10097 		inherit_expression_dependencies(id, ptr);
10098 		if (forward)
10099 			add_implied_read_expression(*e, ptr);
10100 		break;
10101 	}
10102 
10103 	case OpInBoundsAccessChain:
10104 	case OpAccessChain:
10105 	case OpPtrAccessChain:
10106 	{
10107 		auto *var = maybe_get<SPIRVariable>(ops[2]);
10108 		if (var)
10109 			flush_variable_declaration(var->self);
10110 
10111 		// If the base is immutable, the access chain pointer must also be.
10112 		// If an expression is mutable and forwardable, we speculate that it is immutable.
10113 		AccessChainMeta meta;
10114 		bool ptr_chain = opcode == OpPtrAccessChain;
10115 		auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
10116 
10117 		auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
10118 
10119 		auto *backing_variable = maybe_get_backing_variable(ops[2]);
10120 		expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
10121 		expr.need_transpose = meta.need_transpose;
10122 		expr.access_chain = true;
10123 
10124 		// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
10125 		if (meta.storage_is_packed)
10126 			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
10127 		if (meta.storage_physical_type != 0)
10128 			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10129 		if (meta.storage_is_invariant)
10130 			set_decoration(ops[1], DecorationInvariant);
10131 		if (meta.flattened_struct)
10132 			flattened_structs[ops[1]] = true;
10133 
10134 		// If we have some expression dependencies in our access chain, this access chain is technically a forwarded
10135 		// temporary which could be subject to invalidation.
10136 		// Need to assume we're forwarded while calling inherit_expression_depdendencies.
10137 		forwarded_temporaries.insert(ops[1]);
10138 		// The access chain itself is never forced to a temporary, but its dependencies might.
10139 		suppressed_usage_tracking.insert(ops[1]);
10140 
10141 		for (uint32_t i = 2; i < length; i++)
10142 		{
10143 			inherit_expression_dependencies(ops[1], ops[i]);
10144 			add_implied_read_expression(expr, ops[i]);
10145 		}
10146 
10147 		// If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
10148 		// we're not forwarded after all.
10149 		if (expr.expression_dependencies.empty())
10150 			forwarded_temporaries.erase(ops[1]);
10151 
10152 		break;
10153 	}
10154 
10155 	case OpStore:
10156 	{
10157 		auto *var = maybe_get<SPIRVariable>(ops[0]);
10158 
10159 		if (var && var->statically_assigned)
10160 			var->static_expression = ops[1];
10161 		else if (var && var->loop_variable && !var->loop_variable_enable)
10162 			var->static_expression = ops[1];
10163 		else if (var && var->remapped_variable && var->static_expression)
10164 		{
10165 			// Skip the write.
10166 		}
10167 		else if (flattened_structs.count(ops[0]))
10168 		{
10169 			store_flattened_struct(ops[0], ops[1]);
10170 			register_write(ops[0]);
10171 		}
10172 		else
10173 		{
10174 			emit_store_statement(ops[0], ops[1]);
10175 		}
10176 
10177 		// Storing a pointer results in a variable pointer, so we must conservatively assume
10178 		// we can write through it.
10179 		if (expression_type(ops[1]).pointer)
10180 			register_write(ops[1]);
10181 		break;
10182 	}
10183 
10184 	case OpArrayLength:
10185 	{
10186 		uint32_t result_type = ops[0];
10187 		uint32_t id = ops[1];
10188 		auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10189 		if (has_decoration(ops[2], DecorationNonUniform))
10190 			convert_non_uniform_expression(e, ops[2]);
10191 		set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
10192 		                    true);
10193 		break;
10194 	}
10195 
10196 	// Function calls
10197 	case OpFunctionCall:
10198 	{
10199 		uint32_t result_type = ops[0];
10200 		uint32_t id = ops[1];
10201 		uint32_t func = ops[2];
10202 		const auto *arg = &ops[3];
10203 		length -= 3;
10204 
10205 		auto &callee = get<SPIRFunction>(func);
10206 		auto &return_type = get<SPIRType>(callee.return_type);
10207 		bool pure = function_is_pure(callee);
10208 
10209 		bool callee_has_out_variables = false;
10210 		bool emit_return_value_as_argument = false;
10211 
10212 		// Invalidate out variables passed to functions since they can be OpStore'd to.
10213 		for (uint32_t i = 0; i < length; i++)
10214 		{
10215 			if (callee.arguments[i].write_count)
10216 			{
10217 				register_call_out_argument(arg[i]);
10218 				callee_has_out_variables = true;
10219 			}
10220 
10221 			flush_variable_declaration(arg[i]);
10222 		}
10223 
10224 		if (!return_type.array.empty() && !backend.can_return_array)
10225 		{
10226 			callee_has_out_variables = true;
10227 			emit_return_value_as_argument = true;
10228 		}
10229 
10230 		if (!pure)
10231 			register_impure_function_call();
10232 
10233 		string funexpr;
10234 		SmallVector<string> arglist;
10235 		funexpr += to_name(func) + "(";
10236 
10237 		if (emit_return_value_as_argument)
10238 		{
10239 			statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
10240 			arglist.push_back(to_name(id));
10241 		}
10242 
10243 		for (uint32_t i = 0; i < length; i++)
10244 		{
10245 			// Do not pass in separate images or samplers if we're remapping
10246 			// to combined image samplers.
10247 			if (skip_argument(arg[i]))
10248 				continue;
10249 
10250 			arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
10251 		}
10252 
10253 		for (auto &combined : callee.combined_parameters)
10254 		{
10255 			auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
10256 			auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
10257 			arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
10258 		}
10259 
10260 		append_global_func_args(callee, length, arglist);
10261 
10262 		funexpr += merge(arglist);
10263 		funexpr += ")";
10264 
10265 		// Check for function call constraints.
10266 		check_function_call_constraints(arg, length);
10267 
10268 		if (return_type.basetype != SPIRType::Void)
10269 		{
10270 			// If the function actually writes to an out variable,
10271 			// take the conservative route and do not forward.
10272 			// The problem is that we might not read the function
10273 			// result (and emit the function) before an out variable
10274 			// is read (common case when return value is ignored!
10275 			// In order to avoid start tracking invalid variables,
10276 			// just avoid the forwarding problem altogether.
10277 			bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
10278 			               (forced_temporaries.find(id) == end(forced_temporaries));
10279 
10280 			if (emit_return_value_as_argument)
10281 			{
10282 				statement(funexpr, ";");
10283 				set<SPIRExpression>(id, to_name(id), result_type, true);
10284 			}
10285 			else
10286 				emit_op(result_type, id, funexpr, forward);
10287 
10288 			// Function calls are implicit loads from all variables in question.
10289 			// Set dependencies for them.
10290 			for (uint32_t i = 0; i < length; i++)
10291 				register_read(id, arg[i], forward);
10292 
10293 			// If we're going to forward the temporary result,
10294 			// put dependencies on every variable that must not change.
10295 			if (forward)
10296 				register_global_read_dependencies(callee, id);
10297 		}
10298 		else
10299 			statement(funexpr, ";");
10300 
10301 		break;
10302 	}
10303 
10304 	// Composite munging
10305 	case OpCompositeConstruct:
10306 	{
10307 		uint32_t result_type = ops[0];
10308 		uint32_t id = ops[1];
10309 		const auto *const elems = &ops[2];
10310 		length -= 2;
10311 
10312 		bool forward = true;
10313 		for (uint32_t i = 0; i < length; i++)
10314 			forward = forward && should_forward(elems[i]);
10315 
10316 		auto &out_type = get<SPIRType>(result_type);
10317 		auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
10318 
10319 		// Only splat if we have vector constructors.
10320 		// Arrays and structs must be initialized properly in full.
10321 		bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
10322 
10323 		bool splat = false;
10324 		bool swizzle_splat = false;
10325 
10326 		if (in_type)
10327 		{
10328 			splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
10329 			swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
10330 
10331 			if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
10332 			{
10333 				// Cannot swizzle literal integers as a special case.
10334 				swizzle_splat = false;
10335 			}
10336 		}
10337 
10338 		if (splat || swizzle_splat)
10339 		{
10340 			uint32_t input = elems[0];
10341 			for (uint32_t i = 0; i < length; i++)
10342 			{
10343 				if (input != elems[i])
10344 				{
10345 					splat = false;
10346 					swizzle_splat = false;
10347 				}
10348 			}
10349 		}
10350 
10351 		if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
10352 			forward = false;
10353 		if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
10354 			forward = false;
10355 		if (type_is_empty(out_type) && !backend.supports_empty_struct)
10356 			forward = false;
10357 
10358 		string constructor_op;
10359 		if (backend.use_initializer_list && composite)
10360 		{
10361 			bool needs_trailing_tracket = false;
10362 			// Only use this path if we are building composites.
10363 			// This path cannot be used for arithmetic.
10364 			if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
10365 				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
10366 			else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
10367 			{
10368 				// MSL path. Array constructor is baked into type here, do not use _constructor variant.
10369 				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10370 				needs_trailing_tracket = true;
10371 			}
10372 			constructor_op += "{ ";
10373 
10374 			if (type_is_empty(out_type) && !backend.supports_empty_struct)
10375 				constructor_op += "0";
10376 			else if (splat)
10377 				constructor_op += to_unpacked_expression(elems[0]);
10378 			else
10379 				constructor_op += build_composite_combiner(result_type, elems, length);
10380 			constructor_op += " }";
10381 			if (needs_trailing_tracket)
10382 				constructor_op += ")";
10383 		}
10384 		else if (swizzle_splat && !composite)
10385 		{
10386 			constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
10387 		}
10388 		else
10389 		{
10390 			constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10391 			if (type_is_empty(out_type) && !backend.supports_empty_struct)
10392 				constructor_op += "0";
10393 			else if (splat)
10394 				constructor_op += to_unpacked_expression(elems[0]);
10395 			else
10396 				constructor_op += build_composite_combiner(result_type, elems, length);
10397 			constructor_op += ")";
10398 		}
10399 
10400 		if (!constructor_op.empty())
10401 		{
10402 			emit_op(result_type, id, constructor_op, forward);
10403 			for (uint32_t i = 0; i < length; i++)
10404 				inherit_expression_dependencies(id, elems[i]);
10405 		}
10406 		break;
10407 	}
10408 
10409 	case OpVectorInsertDynamic:
10410 	{
10411 		uint32_t result_type = ops[0];
10412 		uint32_t id = ops[1];
10413 		uint32_t vec = ops[2];
10414 		uint32_t comp = ops[3];
10415 		uint32_t index = ops[4];
10416 
10417 		flush_variable_declaration(vec);
10418 
10419 		// Make a copy, then use access chain to store the variable.
10420 		statement(declare_temporary(result_type, id), to_expression(vec), ";");
10421 		set<SPIRExpression>(id, to_name(id), result_type, true);
10422 		auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
10423 		statement(chain, " = ", to_unpacked_expression(comp), ";");
10424 		break;
10425 	}
10426 
10427 	case OpVectorExtractDynamic:
10428 	{
10429 		uint32_t result_type = ops[0];
10430 		uint32_t id = ops[1];
10431 
10432 		auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
10433 		emit_op(result_type, id, expr, should_forward(ops[2]));
10434 		inherit_expression_dependencies(id, ops[2]);
10435 		inherit_expression_dependencies(id, ops[3]);
10436 		break;
10437 	}
10438 
10439 	case OpCompositeExtract:
10440 	{
10441 		uint32_t result_type = ops[0];
10442 		uint32_t id = ops[1];
10443 		length -= 3;
10444 
10445 		auto &type = get<SPIRType>(result_type);
10446 
10447 		// We can only split the expression here if our expression is forwarded as a temporary.
10448 		bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
10449 
10450 		// Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
10451 		auto &composite_type = expression_type(ops[2]);
10452 		bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
10453 		if (composite_type_is_complex)
10454 			allow_base_expression = false;
10455 
10456 		// Packed expressions or physical ID mapped expressions cannot be split up.
10457 		if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
10458 		    has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
10459 			allow_base_expression = false;
10460 
10461 		// Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
10462 		// into the base expression.
10463 		if (is_non_native_row_major_matrix(ops[2]))
10464 			allow_base_expression = false;
10465 
10466 		AccessChainMeta meta;
10467 		SPIRExpression *e = nullptr;
10468 		auto *c = maybe_get<SPIRConstant>(ops[2]);
10469 
10470 		if (c && !c->specialization && !composite_type_is_complex)
10471 		{
10472 			auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length);
10473 			e = &emit_op(result_type, id, expr, true, true);
10474 		}
10475 		else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
10476 		{
10477 			// Only apply this optimization if result is scalar.
10478 
10479 			// We want to split the access chain from the base.
10480 			// This is so we can later combine different CompositeExtract results
10481 			// with CompositeConstruct without emitting code like
10482 			//
10483 			// vec3 temp = texture(...).xyz
10484 			// vec4(temp.x, temp.y, temp.z, 1.0).
10485 			//
10486 			// when we actually wanted to emit this
10487 			// vec4(texture(...).xyz, 1.0).
10488 			//
10489 			// Including the base will prevent this and would trigger multiple reads
10490 			// from expression causing it to be forced to an actual temporary in GLSL.
10491 			auto expr = access_chain_internal(ops[2], &ops[3], length,
10492 			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
10493 			                                  ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
10494 			e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
10495 			inherit_expression_dependencies(id, ops[2]);
10496 			e->base_expression = ops[2];
10497 		}
10498 		else
10499 		{
10500 			auto expr = access_chain_internal(ops[2], &ops[3], length,
10501 			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
10502 			e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
10503 			inherit_expression_dependencies(id, ops[2]);
10504 		}
10505 
10506 		// Pass through some meta information to the loaded expression.
10507 		// We can still end up loading a buffer type to a variable, then CompositeExtract from it
10508 		// instead of loading everything through an access chain.
10509 		e->need_transpose = meta.need_transpose;
10510 		if (meta.storage_is_packed)
10511 			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10512 		if (meta.storage_physical_type != 0)
10513 			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10514 		if (meta.storage_is_invariant)
10515 			set_decoration(id, DecorationInvariant);
10516 
10517 		break;
10518 	}
10519 
10520 	case OpCompositeInsert:
10521 	{
10522 		uint32_t result_type = ops[0];
10523 		uint32_t id = ops[1];
10524 		uint32_t obj = ops[2];
10525 		uint32_t composite = ops[3];
10526 		const auto *elems = &ops[4];
10527 		length -= 4;
10528 
10529 		flush_variable_declaration(composite);
10530 
10531 		// Make a copy, then use access chain to store the variable.
10532 		statement(declare_temporary(result_type, id), to_expression(composite), ";");
10533 		set<SPIRExpression>(id, to_name(id), result_type, true);
10534 		auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10535 		statement(chain, " = ", to_unpacked_expression(obj), ";");
10536 
10537 		break;
10538 	}
10539 
10540 	case OpCopyMemory:
10541 	{
10542 		uint32_t lhs = ops[0];
10543 		uint32_t rhs = ops[1];
10544 		if (lhs != rhs)
10545 		{
10546 			uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
10547 			if (!tmp_id)
10548 				tmp_id = ir.increase_bound_by(1);
10549 			uint32_t tmp_type_id = expression_type(rhs).parent_type;
10550 
10551 			EmbeddedInstruction fake_load, fake_store;
10552 			fake_load.op = OpLoad;
10553 			fake_load.length = 3;
10554 			fake_load.ops.push_back(tmp_type_id);
10555 			fake_load.ops.push_back(tmp_id);
10556 			fake_load.ops.push_back(rhs);
10557 
10558 			fake_store.op = OpStore;
10559 			fake_store.length = 2;
10560 			fake_store.ops.push_back(lhs);
10561 			fake_store.ops.push_back(tmp_id);
10562 
10563 			// Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
10564 			// Synthesize a fake Load and Store pair for CopyMemory.
10565 			emit_instruction(fake_load);
10566 			emit_instruction(fake_store);
10567 		}
10568 		break;
10569 	}
10570 
10571 	case OpCopyLogical:
10572 	{
10573 		// This is used for copying object of different types, arrays and structs.
10574 		// We need to unroll the copy, element-by-element.
10575 		uint32_t result_type = ops[0];
10576 		uint32_t id = ops[1];
10577 		uint32_t rhs = ops[2];
10578 
10579 		emit_uninitialized_temporary_expression(result_type, id);
10580 		emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
10581 		break;
10582 	}
10583 
10584 	case OpCopyObject:
10585 	{
10586 		uint32_t result_type = ops[0];
10587 		uint32_t id = ops[1];
10588 		uint32_t rhs = ops[2];
10589 		bool pointer = get<SPIRType>(result_type).pointer;
10590 
10591 		auto *chain = maybe_get<SPIRAccessChain>(rhs);
10592 		auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
10593 		if (chain)
10594 		{
10595 			// Cannot lower to a SPIRExpression, just copy the object.
10596 			auto &e = set<SPIRAccessChain>(id, *chain);
10597 			e.self = id;
10598 		}
10599 		else if (imgsamp)
10600 		{
10601 			// Cannot lower to a SPIRExpression, just copy the object.
10602 			// GLSL does not currently use this type and will never get here, but MSL does.
10603 			// Handled here instead of CompilerMSL for better integration and general handling,
10604 			// and in case GLSL or other subclasses require it in the future.
10605 			auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
10606 			e.self = id;
10607 		}
10608 		else if (expression_is_lvalue(rhs) && !pointer)
10609 		{
10610 			// Need a copy.
10611 			// For pointer types, we copy the pointer itself.
10612 			statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
10613 			set<SPIRExpression>(id, to_name(id), result_type, true);
10614 		}
10615 		else
10616 		{
10617 			// RHS expression is immutable, so just forward it.
10618 			// Copying these things really make no sense, but
10619 			// seems to be allowed anyways.
10620 			auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
10621 			if (pointer)
10622 			{
10623 				auto *var = maybe_get_backing_variable(rhs);
10624 				e.loaded_from = var ? var->self : ID(0);
10625 			}
10626 
10627 			// If we're copying an access chain, need to inherit the read expressions.
10628 			auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
10629 			if (rhs_expr)
10630 			{
10631 				e.implied_read_expressions = rhs_expr->implied_read_expressions;
10632 				e.expression_dependencies = rhs_expr->expression_dependencies;
10633 			}
10634 		}
10635 		break;
10636 	}
10637 
10638 	case OpVectorShuffle:
10639 	{
10640 		uint32_t result_type = ops[0];
10641 		uint32_t id = ops[1];
10642 		uint32_t vec0 = ops[2];
10643 		uint32_t vec1 = ops[3];
10644 		const auto *elems = &ops[4];
10645 		length -= 4;
10646 
10647 		auto &type0 = expression_type(vec0);
10648 
10649 		// If we have the undefined swizzle index -1, we need to swizzle in undefined data,
10650 		// or in our case, T(0).
10651 		bool shuffle = false;
10652 		for (uint32_t i = 0; i < length; i++)
10653 			if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
10654 				shuffle = true;
10655 
10656 		// Cannot use swizzles with packed expressions, force shuffle path.
10657 		if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
10658 			shuffle = true;
10659 
10660 		string expr;
10661 		bool should_fwd, trivial_forward;
10662 
10663 		if (shuffle)
10664 		{
10665 			should_fwd = should_forward(vec0) && should_forward(vec1);
10666 			trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
10667 
10668 			// Constructor style and shuffling from two different vectors.
10669 			SmallVector<string> args;
10670 			for (uint32_t i = 0; i < length; i++)
10671 			{
10672 				if (elems[i] == 0xffffffffu)
10673 				{
10674 					// Use a constant 0 here.
10675 					// We could use the first component or similar, but then we risk propagating
10676 					// a value we might not need, and bog down codegen.
10677 					SPIRConstant c;
10678 					c.constant_type = type0.parent_type;
10679 					assert(type0.parent_type != ID(0));
10680 					args.push_back(constant_expression(c));
10681 				}
10682 				else if (elems[i] >= type0.vecsize)
10683 					args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
10684 				else
10685 					args.push_back(to_extract_component_expression(vec0, elems[i]));
10686 			}
10687 			expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
10688 		}
10689 		else
10690 		{
10691 			should_fwd = should_forward(vec0);
10692 			trivial_forward = should_suppress_usage_tracking(vec0);
10693 
10694 			// We only source from first vector, so can use swizzle.
10695 			// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
10696 			expr += to_enclosed_unpacked_expression(vec0);
10697 			expr += ".";
10698 			for (uint32_t i = 0; i < length; i++)
10699 			{
10700 				assert(elems[i] != 0xffffffffu);
10701 				expr += index_to_swizzle(elems[i]);
10702 			}
10703 
10704 			if (backend.swizzle_is_function && length > 1)
10705 				expr += "()";
10706 		}
10707 
10708 		// A shuffle is trivial in that it doesn't actually *do* anything.
10709 		// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
10710 
10711 		emit_op(result_type, id, expr, should_fwd, trivial_forward);
10712 
10713 		inherit_expression_dependencies(id, vec0);
10714 		if (vec0 != vec1)
10715 			inherit_expression_dependencies(id, vec1);
10716 		break;
10717 	}
10718 
10719 	// ALU
10720 	case OpIsNan:
10721 		GLSL_UFOP(isnan);
10722 		break;
10723 
10724 	case OpIsInf:
10725 		GLSL_UFOP(isinf);
10726 		break;
10727 
10728 	case OpSNegate:
10729 	case OpFNegate:
10730 		GLSL_UOP(-);
10731 		break;
10732 
10733 	case OpIAdd:
10734 	{
10735 		// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
10736 		auto type = get<SPIRType>(ops[0]).basetype;
10737 		GLSL_BOP_CAST(+, type);
10738 		break;
10739 	}
10740 
10741 	case OpFAdd:
10742 		GLSL_BOP(+);
10743 		break;
10744 
10745 	case OpISub:
10746 	{
10747 		auto type = get<SPIRType>(ops[0]).basetype;
10748 		GLSL_BOP_CAST(-, type);
10749 		break;
10750 	}
10751 
10752 	case OpFSub:
10753 		GLSL_BOP(-);
10754 		break;
10755 
10756 	case OpIMul:
10757 	{
10758 		auto type = get<SPIRType>(ops[0]).basetype;
10759 		GLSL_BOP_CAST(*, type);
10760 		break;
10761 	}
10762 
10763 	case OpVectorTimesMatrix:
10764 	case OpMatrixTimesVector:
10765 	{
10766 		// If the matrix needs transpose, just flip the multiply order.
10767 		auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
10768 		if (e && e->need_transpose)
10769 		{
10770 			e->need_transpose = false;
10771 			string expr;
10772 
10773 			if (opcode == OpMatrixTimesVector)
10774 				expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
10775 				            enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10776 			else
10777 				expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10778 				            to_enclosed_unpacked_expression(ops[2]));
10779 
10780 			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10781 			emit_op(ops[0], ops[1], expr, forward);
10782 			e->need_transpose = true;
10783 			inherit_expression_dependencies(ops[1], ops[2]);
10784 			inherit_expression_dependencies(ops[1], ops[3]);
10785 		}
10786 		else
10787 			GLSL_BOP(*);
10788 		break;
10789 	}
10790 
10791 	case OpMatrixTimesMatrix:
10792 	{
10793 		auto *a = maybe_get<SPIRExpression>(ops[2]);
10794 		auto *b = maybe_get<SPIRExpression>(ops[3]);
10795 
10796 		// If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
10797 		// a^T * b^T = (b * a)^T.
10798 		if (a && b && a->need_transpose && b->need_transpose)
10799 		{
10800 			a->need_transpose = false;
10801 			b->need_transpose = false;
10802 			auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10803 			                 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10804 			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10805 			auto &e = emit_op(ops[0], ops[1], expr, forward);
10806 			e.need_transpose = true;
10807 			a->need_transpose = true;
10808 			b->need_transpose = true;
10809 			inherit_expression_dependencies(ops[1], ops[2]);
10810 			inherit_expression_dependencies(ops[1], ops[3]);
10811 		}
10812 		else
10813 			GLSL_BOP(*);
10814 
10815 		break;
10816 	}
10817 
10818 	case OpFMul:
10819 	case OpMatrixTimesScalar:
10820 	case OpVectorTimesScalar:
10821 		GLSL_BOP(*);
10822 		break;
10823 
10824 	case OpOuterProduct:
10825 		GLSL_BFOP(outerProduct);
10826 		break;
10827 
10828 	case OpDot:
10829 		GLSL_BFOP(dot);
10830 		break;
10831 
10832 	case OpTranspose:
10833 		if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
10834 		{
10835 			// transpose() is not available, so instead, flip need_transpose,
10836 			// which can later be turned into an emulated transpose op by
10837 			// convert_row_major_matrix(), if necessary.
10838 			uint32_t result_type = ops[0];
10839 			uint32_t result_id = ops[1];
10840 			uint32_t input = ops[2];
10841 
10842 			// Force need_transpose to false temporarily to prevent
10843 			// to_expression() from doing the transpose.
10844 			bool need_transpose = false;
10845 			auto *input_e = maybe_get<SPIRExpression>(input);
10846 			if (input_e)
10847 				swap(need_transpose, input_e->need_transpose);
10848 
10849 			bool forward = should_forward(input);
10850 			auto &e = emit_op(result_type, result_id, to_expression(input), forward);
10851 			e.need_transpose = !need_transpose;
10852 
10853 			// Restore the old need_transpose flag.
10854 			if (input_e)
10855 				input_e->need_transpose = need_transpose;
10856 		}
10857 		else
10858 			GLSL_UFOP(transpose);
10859 		break;
10860 
10861 	case OpSRem:
10862 	{
10863 		uint32_t result_type = ops[0];
10864 		uint32_t result_id = ops[1];
10865 		uint32_t op0 = ops[2];
10866 		uint32_t op1 = ops[3];
10867 
10868 		// Needs special handling.
10869 		bool forward = should_forward(op0) && should_forward(op1);
10870 		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
10871 		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
10872 
10873 		emit_op(result_type, result_id, expr, forward);
10874 		inherit_expression_dependencies(result_id, op0);
10875 		inherit_expression_dependencies(result_id, op1);
10876 		break;
10877 	}
10878 
10879 	case OpSDiv:
10880 		GLSL_BOP_CAST(/, int_type);
10881 		break;
10882 
10883 	case OpUDiv:
10884 		GLSL_BOP_CAST(/, uint_type);
10885 		break;
10886 
10887 	case OpIAddCarry:
10888 	case OpISubBorrow:
10889 	{
10890 		if (options.es && options.version < 310)
10891 			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
10892 		else if (!options.es && options.version < 400)
10893 			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
10894 
10895 		uint32_t result_type = ops[0];
10896 		uint32_t result_id = ops[1];
10897 		uint32_t op0 = ops[2];
10898 		uint32_t op1 = ops[3];
10899 		auto &type = get<SPIRType>(result_type);
10900 		emit_uninitialized_temporary_expression(result_type, result_id);
10901 		const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
10902 
10903 		statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
10904 		          to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
10905 		break;
10906 	}
10907 
10908 	case OpUMulExtended:
10909 	case OpSMulExtended:
10910 	{
10911 		if (options.es && options.version < 310)
10912 			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
10913 		else if (!options.es && options.version < 400)
10914 			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
10915 
10916 		uint32_t result_type = ops[0];
10917 		uint32_t result_id = ops[1];
10918 		uint32_t op0 = ops[2];
10919 		uint32_t op1 = ops[3];
10920 		auto &type = get<SPIRType>(result_type);
10921 		emit_uninitialized_temporary_expression(result_type, result_id);
10922 		const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
10923 
10924 		statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
10925 		          to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
10926 		break;
10927 	}
10928 
10929 	case OpFDiv:
10930 		GLSL_BOP(/);
10931 		break;
10932 
10933 	case OpShiftRightLogical:
10934 		GLSL_BOP_CAST(>>, uint_type);
10935 		break;
10936 
10937 	case OpShiftRightArithmetic:
10938 		GLSL_BOP_CAST(>>, int_type);
10939 		break;
10940 
10941 	case OpShiftLeftLogical:
10942 	{
10943 		auto type = get<SPIRType>(ops[0]).basetype;
10944 		GLSL_BOP_CAST(<<, type);
10945 		break;
10946 	}
10947 
10948 	case OpBitwiseOr:
10949 	{
10950 		auto type = get<SPIRType>(ops[0]).basetype;
10951 		GLSL_BOP_CAST(|, type);
10952 		break;
10953 	}
10954 
10955 	case OpBitwiseXor:
10956 	{
10957 		auto type = get<SPIRType>(ops[0]).basetype;
10958 		GLSL_BOP_CAST(^, type);
10959 		break;
10960 	}
10961 
10962 	case OpBitwiseAnd:
10963 	{
10964 		auto type = get<SPIRType>(ops[0]).basetype;
10965 		GLSL_BOP_CAST(&, type);
10966 		break;
10967 	}
10968 
10969 	case OpNot:
10970 		GLSL_UOP(~);
10971 		break;
10972 
10973 	case OpUMod:
10974 		GLSL_BOP_CAST(%, uint_type);
10975 		break;
10976 
10977 	case OpSMod:
10978 		GLSL_BOP_CAST(%, int_type);
10979 		break;
10980 
10981 	case OpFMod:
10982 		GLSL_BFOP(mod);
10983 		break;
10984 
10985 	case OpFRem:
10986 	{
10987 		if (is_legacy())
10988 			SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
10989 			                  "needed for legacy.");
10990 
10991 		uint32_t result_type = ops[0];
10992 		uint32_t result_id = ops[1];
10993 		uint32_t op0 = ops[2];
10994 		uint32_t op1 = ops[3];
10995 
10996 		// Needs special handling.
10997 		bool forward = should_forward(op0) && should_forward(op1);
10998 		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
10999 		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
11000 
11001 		emit_op(result_type, result_id, expr, forward);
11002 		inherit_expression_dependencies(result_id, op0);
11003 		inherit_expression_dependencies(result_id, op1);
11004 		break;
11005 	}
11006 
11007 	// Relational
11008 	case OpAny:
11009 		GLSL_UFOP(any);
11010 		break;
11011 
11012 	case OpAll:
11013 		GLSL_UFOP(all);
11014 		break;
11015 
11016 	case OpSelect:
11017 		emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
11018 		break;
11019 
11020 	case OpLogicalOr:
11021 	{
11022 		// No vector variant in GLSL for logical OR.
11023 		auto result_type = ops[0];
11024 		auto id = ops[1];
11025 		auto &type = get<SPIRType>(result_type);
11026 
11027 		if (type.vecsize > 1)
11028 			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
11029 		else
11030 			GLSL_BOP(||);
11031 		break;
11032 	}
11033 
11034 	case OpLogicalAnd:
11035 	{
11036 		// No vector variant in GLSL for logical AND.
11037 		auto result_type = ops[0];
11038 		auto id = ops[1];
11039 		auto &type = get<SPIRType>(result_type);
11040 
11041 		if (type.vecsize > 1)
11042 			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
11043 		else
11044 			GLSL_BOP(&&);
11045 		break;
11046 	}
11047 
11048 	case OpLogicalNot:
11049 	{
11050 		auto &type = get<SPIRType>(ops[0]);
11051 		if (type.vecsize > 1)
11052 			GLSL_UFOP(not );
11053 		else
11054 			GLSL_UOP(!);
11055 		break;
11056 	}
11057 
11058 	case OpIEqual:
11059 	{
11060 		if (expression_type(ops[2]).vecsize > 1)
11061 			GLSL_BFOP_CAST(equal, int_type);
11062 		else
11063 			GLSL_BOP_CAST(==, int_type);
11064 		break;
11065 	}
11066 
11067 	case OpLogicalEqual:
11068 	case OpFOrdEqual:
11069 	{
11070 		if (expression_type(ops[2]).vecsize > 1)
11071 			GLSL_BFOP(equal);
11072 		else
11073 			GLSL_BOP(==);
11074 		break;
11075 	}
11076 
11077 	case OpINotEqual:
11078 	{
11079 		if (expression_type(ops[2]).vecsize > 1)
11080 			GLSL_BFOP_CAST(notEqual, int_type);
11081 		else
11082 			GLSL_BOP_CAST(!=, int_type);
11083 		break;
11084 	}
11085 
11086 	case OpLogicalNotEqual:
11087 	case OpFOrdNotEqual:
11088 	{
11089 		if (expression_type(ops[2]).vecsize > 1)
11090 			GLSL_BFOP(notEqual);
11091 		else
11092 			GLSL_BOP(!=);
11093 		break;
11094 	}
11095 
11096 	case OpUGreaterThan:
11097 	case OpSGreaterThan:
11098 	{
11099 		auto type = opcode == OpUGreaterThan ? uint_type : int_type;
11100 		if (expression_type(ops[2]).vecsize > 1)
11101 			GLSL_BFOP_CAST(greaterThan, type);
11102 		else
11103 			GLSL_BOP_CAST(>, type);
11104 		break;
11105 	}
11106 
11107 	case OpFOrdGreaterThan:
11108 	{
11109 		if (expression_type(ops[2]).vecsize > 1)
11110 			GLSL_BFOP(greaterThan);
11111 		else
11112 			GLSL_BOP(>);
11113 		break;
11114 	}
11115 
11116 	case OpUGreaterThanEqual:
11117 	case OpSGreaterThanEqual:
11118 	{
11119 		auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
11120 		if (expression_type(ops[2]).vecsize > 1)
11121 			GLSL_BFOP_CAST(greaterThanEqual, type);
11122 		else
11123 			GLSL_BOP_CAST(>=, type);
11124 		break;
11125 	}
11126 
11127 	case OpFOrdGreaterThanEqual:
11128 	{
11129 		if (expression_type(ops[2]).vecsize > 1)
11130 			GLSL_BFOP(greaterThanEqual);
11131 		else
11132 			GLSL_BOP(>=);
11133 		break;
11134 	}
11135 
11136 	case OpULessThan:
11137 	case OpSLessThan:
11138 	{
11139 		auto type = opcode == OpULessThan ? uint_type : int_type;
11140 		if (expression_type(ops[2]).vecsize > 1)
11141 			GLSL_BFOP_CAST(lessThan, type);
11142 		else
11143 			GLSL_BOP_CAST(<, type);
11144 		break;
11145 	}
11146 
11147 	case OpFOrdLessThan:
11148 	{
11149 		if (expression_type(ops[2]).vecsize > 1)
11150 			GLSL_BFOP(lessThan);
11151 		else
11152 			GLSL_BOP(<);
11153 		break;
11154 	}
11155 
11156 	case OpULessThanEqual:
11157 	case OpSLessThanEqual:
11158 	{
11159 		auto type = opcode == OpULessThanEqual ? uint_type : int_type;
11160 		if (expression_type(ops[2]).vecsize > 1)
11161 			GLSL_BFOP_CAST(lessThanEqual, type);
11162 		else
11163 			GLSL_BOP_CAST(<=, type);
11164 		break;
11165 	}
11166 
11167 	case OpFOrdLessThanEqual:
11168 	{
11169 		if (expression_type(ops[2]).vecsize > 1)
11170 			GLSL_BFOP(lessThanEqual);
11171 		else
11172 			GLSL_BOP(<=);
11173 		break;
11174 	}
11175 
11176 	// Conversion
11177 	case OpSConvert:
11178 	case OpConvertSToF:
11179 	case OpUConvert:
11180 	case OpConvertUToF:
11181 	{
11182 		auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
11183 		uint32_t result_type = ops[0];
11184 		uint32_t id = ops[1];
11185 
11186 		auto &type = get<SPIRType>(result_type);
11187 		auto &arg_type = expression_type(ops[2]);
11188 		auto func = type_to_glsl_constructor(type);
11189 
11190 		if (arg_type.width < type.width || type_is_floating_point(type))
11191 			emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
11192 		else
11193 			emit_unary_func_op(result_type, id, ops[2], func.c_str());
11194 		break;
11195 	}
11196 
11197 	case OpConvertFToU:
11198 	case OpConvertFToS:
11199 	{
11200 		// Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
11201 		uint32_t result_type = ops[0];
11202 		uint32_t id = ops[1];
11203 		auto &type = get<SPIRType>(result_type);
11204 		auto expected_type = type;
11205 		auto &float_type = expression_type(ops[2]);
11206 		expected_type.basetype =
11207 		    opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
11208 
11209 		auto func = type_to_glsl_constructor(expected_type);
11210 		emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
11211 		break;
11212 	}
11213 
11214 	case OpFConvert:
11215 	{
11216 		uint32_t result_type = ops[0];
11217 		uint32_t id = ops[1];
11218 
11219 		auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
11220 		emit_unary_func_op(result_type, id, ops[2], func.c_str());
11221 		break;
11222 	}
11223 
11224 	case OpBitcast:
11225 	{
11226 		uint32_t result_type = ops[0];
11227 		uint32_t id = ops[1];
11228 		uint32_t arg = ops[2];
11229 
11230 		if (!emit_complex_bitcast(result_type, id, arg))
11231 		{
11232 			auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
11233 			emit_unary_func_op(result_type, id, arg, op.c_str());
11234 		}
11235 		break;
11236 	}
11237 
11238 	case OpQuantizeToF16:
11239 	{
11240 		uint32_t result_type = ops[0];
11241 		uint32_t id = ops[1];
11242 		uint32_t arg = ops[2];
11243 
11244 		string op;
11245 		auto &type = get<SPIRType>(result_type);
11246 
11247 		switch (type.vecsize)
11248 		{
11249 		case 1:
11250 			op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
11251 			break;
11252 		case 2:
11253 			op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
11254 			break;
11255 		case 3:
11256 		{
11257 			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
11258 			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
11259 			op = join("vec3(", op0, ", ", op1, ")");
11260 			break;
11261 		}
11262 		case 4:
11263 		{
11264 			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
11265 			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
11266 			op = join("vec4(", op0, ", ", op1, ")");
11267 			break;
11268 		}
11269 		default:
11270 			SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
11271 		}
11272 
11273 		emit_op(result_type, id, op, should_forward(arg));
11274 		inherit_expression_dependencies(id, arg);
11275 		break;
11276 	}
11277 
11278 	// Derivatives
11279 	case OpDPdx:
11280 		GLSL_UFOP(dFdx);
11281 		if (is_legacy_es())
11282 			require_extension_internal("GL_OES_standard_derivatives");
11283 		register_control_dependent_expression(ops[1]);
11284 		break;
11285 
11286 	case OpDPdy:
11287 		GLSL_UFOP(dFdy);
11288 		if (is_legacy_es())
11289 			require_extension_internal("GL_OES_standard_derivatives");
11290 		register_control_dependent_expression(ops[1]);
11291 		break;
11292 
11293 	case OpDPdxFine:
11294 		GLSL_UFOP(dFdxFine);
11295 		if (options.es)
11296 		{
11297 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11298 		}
11299 		if (options.version < 450)
11300 			require_extension_internal("GL_ARB_derivative_control");
11301 		register_control_dependent_expression(ops[1]);
11302 		break;
11303 
11304 	case OpDPdyFine:
11305 		GLSL_UFOP(dFdyFine);
11306 		if (options.es)
11307 		{
11308 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11309 		}
11310 		if (options.version < 450)
11311 			require_extension_internal("GL_ARB_derivative_control");
11312 		register_control_dependent_expression(ops[1]);
11313 		break;
11314 
11315 	case OpDPdxCoarse:
11316 		if (options.es)
11317 		{
11318 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11319 		}
11320 		GLSL_UFOP(dFdxCoarse);
11321 		if (options.version < 450)
11322 			require_extension_internal("GL_ARB_derivative_control");
11323 		register_control_dependent_expression(ops[1]);
11324 		break;
11325 
11326 	case OpDPdyCoarse:
11327 		GLSL_UFOP(dFdyCoarse);
11328 		if (options.es)
11329 		{
11330 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11331 		}
11332 		if (options.version < 450)
11333 			require_extension_internal("GL_ARB_derivative_control");
11334 		register_control_dependent_expression(ops[1]);
11335 		break;
11336 
11337 	case OpFwidth:
11338 		GLSL_UFOP(fwidth);
11339 		if (is_legacy_es())
11340 			require_extension_internal("GL_OES_standard_derivatives");
11341 		register_control_dependent_expression(ops[1]);
11342 		break;
11343 
11344 	case OpFwidthCoarse:
11345 		GLSL_UFOP(fwidthCoarse);
11346 		if (options.es)
11347 		{
11348 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11349 		}
11350 		if (options.version < 450)
11351 			require_extension_internal("GL_ARB_derivative_control");
11352 		register_control_dependent_expression(ops[1]);
11353 		break;
11354 
11355 	case OpFwidthFine:
11356 		GLSL_UFOP(fwidthFine);
11357 		if (options.es)
11358 		{
11359 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11360 		}
11361 		if (options.version < 450)
11362 			require_extension_internal("GL_ARB_derivative_control");
11363 		register_control_dependent_expression(ops[1]);
11364 		break;
11365 
11366 	// Bitfield
11367 	case OpBitFieldInsert:
11368 	{
11369 		emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
11370 		break;
11371 	}
11372 
11373 	case OpBitFieldSExtract:
11374 	{
11375 		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
11376 		                                SPIRType::Int, SPIRType::Int);
11377 		break;
11378 	}
11379 
11380 	case OpBitFieldUExtract:
11381 	{
11382 		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
11383 		                                SPIRType::Int, SPIRType::Int);
11384 		break;
11385 	}
11386 
11387 	case OpBitReverse:
11388 		// BitReverse does not have issues with sign since result type must match input type.
11389 		GLSL_UFOP(bitfieldReverse);
11390 		break;
11391 
11392 	case OpBitCount:
11393 	{
11394 		auto basetype = expression_type(ops[2]).basetype;
11395 		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
11396 		break;
11397 	}
11398 
11399 	// Atomics
11400 	case OpAtomicExchange:
11401 	{
11402 		uint32_t result_type = ops[0];
11403 		uint32_t id = ops[1];
11404 		uint32_t ptr = ops[2];
11405 		// Ignore semantics for now, probably only relevant to CL.
11406 		uint32_t val = ops[5];
11407 		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11408 
11409 		emit_atomic_func_op(result_type, id, ptr, val, op);
11410 		break;
11411 	}
11412 
11413 	case OpAtomicCompareExchange:
11414 	{
11415 		uint32_t result_type = ops[0];
11416 		uint32_t id = ops[1];
11417 		uint32_t ptr = ops[2];
11418 		uint32_t val = ops[6];
11419 		uint32_t comp = ops[7];
11420 		const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
11421 
11422 		emit_atomic_func_op(result_type, id, ptr, comp, val, op);
11423 		break;
11424 	}
11425 
11426 	case OpAtomicLoad:
11427 	{
11428 		// In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
11429 		// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11430 		auto &type = expression_type(ops[2]);
11431 		forced_temporaries.insert(ops[1]);
11432 		bool atomic_image = check_atomic_image(ops[2]);
11433 		bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11434 		                     (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11435 		const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11436 		const char *increment = unsigned_type ? "0u" : "0";
11437 		emit_op(ops[0], ops[1],
11438 		        join(op, "(",
11439 		             to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
11440 		flush_all_atomic_capable_variables();
11441 		break;
11442 	}
11443 
11444 	case OpAtomicStore:
11445 	{
11446 		// In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
11447 		// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11448 		uint32_t ptr = ops[0];
11449 		// Ignore semantics for now, probably only relevant to CL.
11450 		uint32_t val = ops[3];
11451 		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11452 		statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");");
11453 		flush_all_atomic_capable_variables();
11454 		break;
11455 	}
11456 
11457 	case OpAtomicIIncrement:
11458 	case OpAtomicIDecrement:
11459 	{
11460 		forced_temporaries.insert(ops[1]);
11461 		auto &type = expression_type(ops[2]);
11462 		if (type.storage == StorageClassAtomicCounter)
11463 		{
11464 			// Legacy GLSL stuff, not sure if this is relevant to support.
11465 			if (opcode == OpAtomicIIncrement)
11466 				GLSL_UFOP(atomicCounterIncrement);
11467 			else
11468 				GLSL_UFOP(atomicCounterDecrement);
11469 		}
11470 		else
11471 		{
11472 			bool atomic_image = check_atomic_image(ops[2]);
11473 			bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11474 			                     (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11475 			const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11476 
11477 			const char *increment = nullptr;
11478 			if (opcode == OpAtomicIIncrement && unsigned_type)
11479 				increment = "1u";
11480 			else if (opcode == OpAtomicIIncrement)
11481 				increment = "1";
11482 			else if (unsigned_type)
11483 				increment = "uint(-1)";
11484 			else
11485 				increment = "-1";
11486 
11487 			emit_op(ops[0], ops[1],
11488 			        join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
11489 		}
11490 
11491 		flush_all_atomic_capable_variables();
11492 		break;
11493 	}
11494 
11495 	case OpAtomicIAdd:
11496 	{
11497 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11498 		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11499 		break;
11500 	}
11501 
11502 	case OpAtomicISub:
11503 	{
11504 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11505 		forced_temporaries.insert(ops[1]);
11506 		auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
11507 		emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
11508 		flush_all_atomic_capable_variables();
11509 		break;
11510 	}
11511 
11512 	case OpAtomicSMin:
11513 	case OpAtomicUMin:
11514 	{
11515 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
11516 		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11517 		break;
11518 	}
11519 
11520 	case OpAtomicSMax:
11521 	case OpAtomicUMax:
11522 	{
11523 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
11524 		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11525 		break;
11526 	}
11527 
11528 	case OpAtomicAnd:
11529 	{
11530 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
11531 		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11532 		break;
11533 	}
11534 
11535 	case OpAtomicOr:
11536 	{
11537 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
11538 		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11539 		break;
11540 	}
11541 
11542 	case OpAtomicXor:
11543 	{
11544 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
11545 		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11546 		break;
11547 	}
11548 
11549 	// Geometry shaders
11550 	case OpEmitVertex:
11551 		statement("EmitVertex();");
11552 		break;
11553 
11554 	case OpEndPrimitive:
11555 		statement("EndPrimitive();");
11556 		break;
11557 
11558 	case OpEmitStreamVertex:
11559 	{
11560 		if (options.es)
11561 			SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11562 		else if (!options.es && options.version < 400)
11563 			SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11564 
11565 		auto stream_expr = to_expression(ops[0]);
11566 		if (expression_type(ops[0]).basetype != SPIRType::Int)
11567 			stream_expr = join("int(", stream_expr, ")");
11568 		statement("EmitStreamVertex(", stream_expr, ");");
11569 		break;
11570 	}
11571 
11572 	case OpEndStreamPrimitive:
11573 	{
11574 		if (options.es)
11575 			SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11576 		else if (!options.es && options.version < 400)
11577 			SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11578 
11579 		auto stream_expr = to_expression(ops[0]);
11580 		if (expression_type(ops[0]).basetype != SPIRType::Int)
11581 			stream_expr = join("int(", stream_expr, ")");
11582 		statement("EndStreamPrimitive(", stream_expr, ");");
11583 		break;
11584 	}
11585 
11586 	// Textures
11587 	case OpImageSampleExplicitLod:
11588 	case OpImageSampleProjExplicitLod:
11589 	case OpImageSampleDrefExplicitLod:
11590 	case OpImageSampleProjDrefExplicitLod:
11591 	case OpImageSampleImplicitLod:
11592 	case OpImageSampleProjImplicitLod:
11593 	case OpImageSampleDrefImplicitLod:
11594 	case OpImageSampleProjDrefImplicitLod:
11595 	case OpImageFetch:
11596 	case OpImageGather:
11597 	case OpImageDrefGather:
11598 		// Gets a bit hairy, so move this to a separate instruction.
11599 		emit_texture_op(instruction, false);
11600 		break;
11601 
11602 	case OpImageSparseSampleExplicitLod:
11603 	case OpImageSparseSampleProjExplicitLod:
11604 	case OpImageSparseSampleDrefExplicitLod:
11605 	case OpImageSparseSampleProjDrefExplicitLod:
11606 	case OpImageSparseSampleImplicitLod:
11607 	case OpImageSparseSampleProjImplicitLod:
11608 	case OpImageSparseSampleDrefImplicitLod:
11609 	case OpImageSparseSampleProjDrefImplicitLod:
11610 	case OpImageSparseFetch:
11611 	case OpImageSparseGather:
11612 	case OpImageSparseDrefGather:
11613 		// Gets a bit hairy, so move this to a separate instruction.
11614 		emit_texture_op(instruction, true);
11615 		break;
11616 
11617 	case OpImageSparseTexelsResident:
11618 		if (options.es)
11619 			SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
11620 		require_extension_internal("GL_ARB_sparse_texture2");
11621 		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
11622 		break;
11623 
11624 	case OpImage:
11625 	{
11626 		uint32_t result_type = ops[0];
11627 		uint32_t id = ops[1];
11628 
11629 		// Suppress usage tracking.
11630 		auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
11631 
11632 		// When using the image, we need to know which variable it is actually loaded from.
11633 		auto *var = maybe_get_backing_variable(ops[2]);
11634 		e.loaded_from = var ? var->self : ID(0);
11635 		break;
11636 	}
11637 
11638 	case OpImageQueryLod:
11639 	{
11640 		const char *op = nullptr;
11641 		if (!options.es && options.version < 400)
11642 		{
11643 			require_extension_internal("GL_ARB_texture_query_lod");
11644 			// For some reason, the ARB spec is all-caps.
11645 			op = "textureQueryLOD";
11646 		}
11647 		else if (options.es)
11648 			SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
11649 		else
11650 			op = "textureQueryLod";
11651 
11652 		auto sampler_expr = to_expression(ops[2]);
11653 		if (has_decoration(ops[2], DecorationNonUniform))
11654 		{
11655 			if (maybe_get_backing_variable(ops[2]))
11656 				convert_non_uniform_expression(sampler_expr, ops[2]);
11657 			else if (*backend.nonuniform_qualifier != '\0')
11658 				sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")");
11659 		}
11660 
11661 		bool forward = should_forward(ops[3]);
11662 		emit_op(ops[0], ops[1],
11663 		        join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"),
11664 		        forward);
11665 		inherit_expression_dependencies(ops[1], ops[2]);
11666 		inherit_expression_dependencies(ops[1], ops[3]);
11667 		register_control_dependent_expression(ops[1]);
11668 		break;
11669 	}
11670 
11671 	case OpImageQueryLevels:
11672 	{
11673 		uint32_t result_type = ops[0];
11674 		uint32_t id = ops[1];
11675 
11676 		if (!options.es && options.version < 430)
11677 			require_extension_internal("GL_ARB_texture_query_levels");
11678 		if (options.es)
11679 			SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
11680 
11681 		auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
11682 		auto &restype = get<SPIRType>(ops[0]);
11683 		expr = bitcast_expression(restype, SPIRType::Int, expr);
11684 		emit_op(result_type, id, expr, true);
11685 		break;
11686 	}
11687 
11688 	case OpImageQuerySamples:
11689 	{
11690 		auto &type = expression_type(ops[2]);
11691 		uint32_t result_type = ops[0];
11692 		uint32_t id = ops[1];
11693 
11694 		string expr;
11695 		if (type.image.sampled == 2)
11696 			expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")");
11697 		else
11698 			expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
11699 
11700 		auto &restype = get<SPIRType>(ops[0]);
11701 		expr = bitcast_expression(restype, SPIRType::Int, expr);
11702 		emit_op(result_type, id, expr, true);
11703 		break;
11704 	}
11705 
11706 	case OpSampledImage:
11707 	{
11708 		uint32_t result_type = ops[0];
11709 		uint32_t id = ops[1];
11710 		emit_sampled_image_op(result_type, id, ops[2], ops[3]);
11711 		inherit_expression_dependencies(id, ops[2]);
11712 		inherit_expression_dependencies(id, ops[3]);
11713 		break;
11714 	}
11715 
11716 	case OpImageQuerySizeLod:
11717 	{
11718 		uint32_t result_type = ops[0];
11719 		uint32_t id = ops[1];
11720 		uint32_t img = ops[2];
11721 
11722 		std::string fname = "textureSize";
11723 		if (is_legacy_desktop())
11724 		{
11725 			auto &type = expression_type(img);
11726 			auto &imgtype = get<SPIRType>(type.self);
11727 			fname = legacy_tex_op(fname, imgtype, img);
11728 		}
11729 		else if (is_legacy_es())
11730 			SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
11731 
11732 		auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
11733 		                 bitcast_expression(SPIRType::Int, ops[3]), ")");
11734 		auto &restype = get<SPIRType>(ops[0]);
11735 		expr = bitcast_expression(restype, SPIRType::Int, expr);
11736 		emit_op(result_type, id, expr, true);
11737 		break;
11738 	}
11739 
11740 	// Image load/store
11741 	case OpImageRead:
11742 	case OpImageSparseRead:
11743 	{
11744 		// We added Nonreadable speculatively to the OpImage variable due to glslangValidator
11745 		// not adding the proper qualifiers.
11746 		// If it turns out we need to read the image after all, remove the qualifier and recompile.
11747 		auto *var = maybe_get_backing_variable(ops[2]);
11748 		if (var)
11749 		{
11750 			auto &flags = ir.meta[var->self].decoration.decoration_flags;
11751 			if (flags.get(DecorationNonReadable))
11752 			{
11753 				flags.clear(DecorationNonReadable);
11754 				force_recompile();
11755 			}
11756 		}
11757 
11758 		uint32_t result_type = ops[0];
11759 		uint32_t id = ops[1];
11760 
11761 		bool pure;
11762 		string imgexpr;
11763 		auto &type = expression_type(ops[2]);
11764 
11765 		if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
11766 		{
11767 			if (type.image.ms)
11768 				SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
11769 
11770 			auto itr =
11771 			    find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
11772 
11773 			if (itr == end(pls_inputs))
11774 			{
11775 				// For non-PLS inputs, we rely on subpass type remapping information to get it right
11776 				// since ImageRead always returns 4-component vectors and the backing type is opaque.
11777 				if (!var->remapped_components)
11778 					SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
11779 				imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
11780 			}
11781 			else
11782 			{
11783 				// PLS input could have different number of components than what the SPIR expects, swizzle to
11784 				// the appropriate vector size.
11785 				uint32_t components = pls_format_to_components(itr->format);
11786 				imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
11787 			}
11788 			pure = true;
11789 		}
11790 		else if (type.image.dim == DimSubpassData)
11791 		{
11792 			if (var && subpass_input_is_framebuffer_fetch(var->self))
11793 			{
11794 				imgexpr = to_expression(var->self);
11795 			}
11796 			else if (options.vulkan_semantics)
11797 			{
11798 				// With Vulkan semantics, use the proper Vulkan GLSL construct.
11799 				if (type.image.ms)
11800 				{
11801 					uint32_t operands = ops[4];
11802 					if (operands != ImageOperandsSampleMask || length != 6)
11803 						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11804 						                  "operand mask was used.");
11805 
11806 					uint32_t samples = ops[5];
11807 					imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")");
11808 				}
11809 				else
11810 					imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")");
11811 			}
11812 			else
11813 			{
11814 				if (type.image.ms)
11815 				{
11816 					uint32_t operands = ops[4];
11817 					if (operands != ImageOperandsSampleMask || length != 6)
11818 						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11819 						                  "operand mask was used.");
11820 
11821 					uint32_t samples = ops[5];
11822 					imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
11823 					               to_expression(samples), ")");
11824 				}
11825 				else
11826 				{
11827 					// Implement subpass loads via texture barrier style sampling.
11828 					imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
11829 				}
11830 			}
11831 			imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
11832 			pure = true;
11833 		}
11834 		else
11835 		{
11836 			bool sparse = opcode == OpImageSparseRead;
11837 			uint32_t sparse_code_id = 0;
11838 			uint32_t sparse_texel_id = 0;
11839 			if (sparse)
11840 				emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
11841 
11842 			// imageLoad only accepts int coords, not uint.
11843 			auto coord_expr = to_expression(ops[3]);
11844 			auto target_coord_type = expression_type(ops[3]);
11845 			target_coord_type.basetype = SPIRType::Int;
11846 			coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
11847 
11848 			// Plain image load/store.
11849 			if (sparse)
11850 			{
11851 				if (type.image.ms)
11852 				{
11853 					uint32_t operands = ops[4];
11854 					if (operands != ImageOperandsSampleMask || length != 6)
11855 						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11856 						                  "operand mask was used.");
11857 
11858 					uint32_t samples = ops[5];
11859 					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
11860 					          coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
11861 				}
11862 				else
11863 				{
11864 					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
11865 					          coord_expr, ", ", to_expression(sparse_texel_id), ");");
11866 				}
11867 				imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
11868 				               to_expression(sparse_texel_id), ")");
11869 			}
11870 			else
11871 			{
11872 				if (type.image.ms)
11873 				{
11874 					uint32_t operands = ops[4];
11875 					if (operands != ImageOperandsSampleMask || length != 6)
11876 						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11877 						                  "operand mask was used.");
11878 
11879 					uint32_t samples = ops[5];
11880 					imgexpr =
11881 					    join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
11882 				}
11883 				else
11884 					imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")");
11885 			}
11886 
11887 			if (!sparse)
11888 				imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
11889 			pure = false;
11890 		}
11891 
11892 		if (var && var->forwardable)
11893 		{
11894 			bool forward = forced_temporaries.find(id) == end(forced_temporaries);
11895 			auto &e = emit_op(result_type, id, imgexpr, forward);
11896 
11897 			// We only need to track dependencies if we're reading from image load/store.
11898 			if (!pure)
11899 			{
11900 				e.loaded_from = var->self;
11901 				if (forward)
11902 					var->dependees.push_back(id);
11903 			}
11904 		}
11905 		else
11906 			emit_op(result_type, id, imgexpr, false);
11907 
11908 		inherit_expression_dependencies(id, ops[2]);
11909 		if (type.image.ms)
11910 			inherit_expression_dependencies(id, ops[5]);
11911 		break;
11912 	}
11913 
11914 	case OpImageTexelPointer:
11915 	{
11916 		uint32_t result_type = ops[0];
11917 		uint32_t id = ops[1];
11918 
11919 		auto coord_expr = to_expression(ops[3]);
11920 		auto target_coord_type = expression_type(ops[3]);
11921 		target_coord_type.basetype = SPIRType::Int;
11922 		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
11923 
11924 		auto expr = join(to_expression(ops[2]), ", ", coord_expr);
11925 		auto &e = set<SPIRExpression>(id, expr, result_type, true);
11926 
11927 		// When using the pointer, we need to know which variable it is actually loaded from.
11928 		auto *var = maybe_get_backing_variable(ops[2]);
11929 		e.loaded_from = var ? var->self : ID(0);
11930 		inherit_expression_dependencies(id, ops[3]);
11931 		break;
11932 	}
11933 
11934 	case OpImageWrite:
11935 	{
11936 		// We added Nonwritable speculatively to the OpImage variable due to glslangValidator
11937 		// not adding the proper qualifiers.
11938 		// If it turns out we need to write to the image after all, remove the qualifier and recompile.
11939 		auto *var = maybe_get_backing_variable(ops[0]);
11940 		if (var)
11941 		{
11942 			auto &flags = ir.meta[var->self].decoration.decoration_flags;
11943 			if (flags.get(DecorationNonWritable))
11944 			{
11945 				flags.clear(DecorationNonWritable);
11946 				force_recompile();
11947 			}
11948 		}
11949 
11950 		auto &type = expression_type(ops[0]);
11951 		auto &value_type = expression_type(ops[2]);
11952 		auto store_type = value_type;
11953 		store_type.vecsize = 4;
11954 
11955 		// imageStore only accepts int coords, not uint.
11956 		auto coord_expr = to_expression(ops[1]);
11957 		auto target_coord_type = expression_type(ops[1]);
11958 		target_coord_type.basetype = SPIRType::Int;
11959 		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
11960 
11961 		if (type.image.ms)
11962 		{
11963 			uint32_t operands = ops[3];
11964 			if (operands != ImageOperandsSampleMask || length != 5)
11965 				SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
11966 			uint32_t samples = ops[4];
11967 			statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
11968 			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
11969 		}
11970 		else
11971 			statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ",
11972 			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
11973 
11974 		if (var && variable_storage_is_aliased(*var))
11975 			flush_all_aliased_variables();
11976 		break;
11977 	}
11978 
11979 	case OpImageQuerySize:
11980 	{
11981 		auto &type = expression_type(ops[2]);
11982 		uint32_t result_type = ops[0];
11983 		uint32_t id = ops[1];
11984 
11985 		if (type.basetype == SPIRType::Image)
11986 		{
11987 			string expr;
11988 			if (type.image.sampled == 2)
11989 			{
11990 				if (!options.es && options.version < 430)
11991 					require_extension_internal("GL_ARB_shader_image_size");
11992 				else if (options.es && options.version < 310)
11993 					SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
11994 
11995 				// The size of an image is always constant.
11996 				expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")");
11997 			}
11998 			else
11999 			{
12000 				// This path is hit for samplerBuffers and multisampled images which do not have LOD.
12001 				std::string fname = "textureSize";
12002 				if (is_legacy())
12003 				{
12004 					auto &imgtype = get<SPIRType>(type.self);
12005 					fname = legacy_tex_op(fname, imgtype, ops[2]);
12006 				}
12007 				expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
12008 			}
12009 
12010 			auto &restype = get<SPIRType>(ops[0]);
12011 			expr = bitcast_expression(restype, SPIRType::Int, expr);
12012 			emit_op(result_type, id, expr, true);
12013 		}
12014 		else
12015 			SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
12016 		break;
12017 	}
12018 
12019 	// Compute
12020 	case OpControlBarrier:
12021 	case OpMemoryBarrier:
12022 	{
12023 		uint32_t execution_scope = 0;
12024 		uint32_t memory;
12025 		uint32_t semantics;
12026 
12027 		if (opcode == OpMemoryBarrier)
12028 		{
12029 			memory = evaluate_constant_u32(ops[0]);
12030 			semantics = evaluate_constant_u32(ops[1]);
12031 		}
12032 		else
12033 		{
12034 			execution_scope = evaluate_constant_u32(ops[0]);
12035 			memory = evaluate_constant_u32(ops[1]);
12036 			semantics = evaluate_constant_u32(ops[2]);
12037 		}
12038 
12039 		if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
12040 		{
12041 			// OpControlBarrier with ScopeSubgroup is subgroupBarrier()
12042 			if (opcode != OpControlBarrier)
12043 			{
12044 				request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
12045 			}
12046 			else
12047 			{
12048 				request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
12049 			}
12050 		}
12051 
12052 		if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
12053 		{
12054 			// Control shaders only have barriers, and it implies memory barriers.
12055 			if (opcode == OpControlBarrier)
12056 				statement("barrier();");
12057 			break;
12058 		}
12059 
12060 		// We only care about these flags, acquire/release and friends are not relevant to GLSL.
12061 		semantics = mask_relevant_memory_semantics(semantics);
12062 
12063 		if (opcode == OpMemoryBarrier)
12064 		{
12065 			// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
12066 			// does what we need, so we avoid redundant barriers.
12067 			const Instruction *next = get_next_instruction_in_block(instruction);
12068 			if (next && next->op == OpControlBarrier)
12069 			{
12070 				auto *next_ops = stream(*next);
12071 				uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
12072 				uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
12073 				next_semantics = mask_relevant_memory_semantics(next_semantics);
12074 
12075 				bool memory_scope_covered = false;
12076 				if (next_memory == memory)
12077 					memory_scope_covered = true;
12078 				else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
12079 				{
12080 					// If we only care about workgroup memory, either Device or Workgroup scope is fine,
12081 					// scope does not have to match.
12082 					if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
12083 					    (memory == ScopeDevice || memory == ScopeWorkgroup))
12084 					{
12085 						memory_scope_covered = true;
12086 					}
12087 				}
12088 				else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
12089 				{
12090 					// The control barrier has device scope, but the memory barrier just has workgroup scope.
12091 					memory_scope_covered = true;
12092 				}
12093 
12094 				// If we have the same memory scope, and all memory types are covered, we're good.
12095 				if (memory_scope_covered && (semantics & next_semantics) == semantics)
12096 					break;
12097 			}
12098 		}
12099 
12100 		// We are synchronizing some memory or syncing execution,
12101 		// so we cannot forward any loads beyond the memory barrier.
12102 		if (semantics || opcode == OpControlBarrier)
12103 		{
12104 			assert(current_emitting_block);
12105 			flush_control_dependent_expressions(current_emitting_block->self);
12106 			flush_all_active_variables();
12107 		}
12108 
12109 		if (memory == ScopeWorkgroup) // Only need to consider memory within a group
12110 		{
12111 			if (semantics == MemorySemanticsWorkgroupMemoryMask)
12112 			{
12113 				// OpControlBarrier implies a memory barrier for shared memory as well.
12114 				bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
12115 				if (!implies_shared_barrier)
12116 					statement("memoryBarrierShared();");
12117 			}
12118 			else if (semantics != 0)
12119 				statement("groupMemoryBarrier();");
12120 		}
12121 		else if (memory == ScopeSubgroup)
12122 		{
12123 			const uint32_t all_barriers =
12124 			    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
12125 
12126 			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
12127 			{
12128 				// These are not relevant for GLSL, but assume it means memoryBarrier().
12129 				// memoryBarrier() does everything, so no need to test anything else.
12130 				statement("subgroupMemoryBarrier();");
12131 			}
12132 			else if ((semantics & all_barriers) == all_barriers)
12133 			{
12134 				// Short-hand instead of emitting 3 barriers.
12135 				statement("subgroupMemoryBarrier();");
12136 			}
12137 			else
12138 			{
12139 				// Pick out individual barriers.
12140 				if (semantics & MemorySemanticsWorkgroupMemoryMask)
12141 					statement("subgroupMemoryBarrierShared();");
12142 				if (semantics & MemorySemanticsUniformMemoryMask)
12143 					statement("subgroupMemoryBarrierBuffer();");
12144 				if (semantics & MemorySemanticsImageMemoryMask)
12145 					statement("subgroupMemoryBarrierImage();");
12146 			}
12147 		}
12148 		else
12149 		{
12150 			const uint32_t all_barriers =
12151 			    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
12152 
12153 			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
12154 			{
12155 				// These are not relevant for GLSL, but assume it means memoryBarrier().
12156 				// memoryBarrier() does everything, so no need to test anything else.
12157 				statement("memoryBarrier();");
12158 			}
12159 			else if ((semantics & all_barriers) == all_barriers)
12160 			{
12161 				// Short-hand instead of emitting 4 barriers.
12162 				statement("memoryBarrier();");
12163 			}
12164 			else
12165 			{
12166 				// Pick out individual barriers.
12167 				if (semantics & MemorySemanticsWorkgroupMemoryMask)
12168 					statement("memoryBarrierShared();");
12169 				if (semantics & MemorySemanticsUniformMemoryMask)
12170 					statement("memoryBarrierBuffer();");
12171 				if (semantics & MemorySemanticsImageMemoryMask)
12172 					statement("memoryBarrierImage();");
12173 			}
12174 		}
12175 
12176 		if (opcode == OpControlBarrier)
12177 		{
12178 			if (execution_scope == ScopeSubgroup)
12179 				statement("subgroupBarrier();");
12180 			else
12181 				statement("barrier();");
12182 		}
12183 		break;
12184 	}
12185 
12186 	case OpExtInst:
12187 	{
12188 		uint32_t extension_set = ops[2];
12189 
12190 		if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
12191 		{
12192 			emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12193 		}
12194 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
12195 		{
12196 			emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12197 		}
12198 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
12199 		{
12200 			emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12201 		}
12202 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
12203 		{
12204 			emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12205 		}
12206 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
12207 		{
12208 			emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12209 		}
12210 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
12211 		{
12212 			break; // Ignore SPIR-V debug information extended instructions.
12213 		}
12214 		else
12215 		{
12216 			statement("// unimplemented ext op ", instruction.op);
12217 			break;
12218 		}
12219 
12220 		break;
12221 	}
12222 
12223 	// Legacy sub-group stuff ...
12224 	case OpSubgroupBallotKHR:
12225 	{
12226 		uint32_t result_type = ops[0];
12227 		uint32_t id = ops[1];
12228 		string expr;
12229 		expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
12230 		emit_op(result_type, id, expr, should_forward(ops[2]));
12231 
12232 		require_extension_internal("GL_ARB_shader_ballot");
12233 		inherit_expression_dependencies(id, ops[2]);
12234 		register_control_dependent_expression(ops[1]);
12235 		break;
12236 	}
12237 
12238 	case OpSubgroupFirstInvocationKHR:
12239 	{
12240 		uint32_t result_type = ops[0];
12241 		uint32_t id = ops[1];
12242 		emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
12243 
12244 		require_extension_internal("GL_ARB_shader_ballot");
12245 		register_control_dependent_expression(ops[1]);
12246 		break;
12247 	}
12248 
12249 	case OpSubgroupReadInvocationKHR:
12250 	{
12251 		uint32_t result_type = ops[0];
12252 		uint32_t id = ops[1];
12253 		emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
12254 
12255 		require_extension_internal("GL_ARB_shader_ballot");
12256 		register_control_dependent_expression(ops[1]);
12257 		break;
12258 	}
12259 
12260 	case OpSubgroupAllKHR:
12261 	{
12262 		uint32_t result_type = ops[0];
12263 		uint32_t id = ops[1];
12264 		emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
12265 
12266 		require_extension_internal("GL_ARB_shader_group_vote");
12267 		register_control_dependent_expression(ops[1]);
12268 		break;
12269 	}
12270 
12271 	case OpSubgroupAnyKHR:
12272 	{
12273 		uint32_t result_type = ops[0];
12274 		uint32_t id = ops[1];
12275 		emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
12276 
12277 		require_extension_internal("GL_ARB_shader_group_vote");
12278 		register_control_dependent_expression(ops[1]);
12279 		break;
12280 	}
12281 
12282 	case OpSubgroupAllEqualKHR:
12283 	{
12284 		uint32_t result_type = ops[0];
12285 		uint32_t id = ops[1];
12286 		emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
12287 
12288 		require_extension_internal("GL_ARB_shader_group_vote");
12289 		register_control_dependent_expression(ops[1]);
12290 		break;
12291 	}
12292 
12293 	case OpGroupIAddNonUniformAMD:
12294 	case OpGroupFAddNonUniformAMD:
12295 	{
12296 		uint32_t result_type = ops[0];
12297 		uint32_t id = ops[1];
12298 		emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
12299 
12300 		require_extension_internal("GL_AMD_shader_ballot");
12301 		register_control_dependent_expression(ops[1]);
12302 		break;
12303 	}
12304 
12305 	case OpGroupFMinNonUniformAMD:
12306 	case OpGroupUMinNonUniformAMD:
12307 	case OpGroupSMinNonUniformAMD:
12308 	{
12309 		uint32_t result_type = ops[0];
12310 		uint32_t id = ops[1];
12311 		emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
12312 
12313 		require_extension_internal("GL_AMD_shader_ballot");
12314 		register_control_dependent_expression(ops[1]);
12315 		break;
12316 	}
12317 
12318 	case OpGroupFMaxNonUniformAMD:
12319 	case OpGroupUMaxNonUniformAMD:
12320 	case OpGroupSMaxNonUniformAMD:
12321 	{
12322 		uint32_t result_type = ops[0];
12323 		uint32_t id = ops[1];
12324 		emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
12325 
12326 		require_extension_internal("GL_AMD_shader_ballot");
12327 		register_control_dependent_expression(ops[1]);
12328 		break;
12329 	}
12330 
12331 	case OpFragmentMaskFetchAMD:
12332 	{
12333 		auto &type = expression_type(ops[2]);
12334 		uint32_t result_type = ops[0];
12335 		uint32_t id = ops[1];
12336 
12337 		if (type.image.dim == spv::DimSubpassData)
12338 		{
12339 			emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
12340 		}
12341 		else
12342 		{
12343 			emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
12344 		}
12345 
12346 		require_extension_internal("GL_AMD_shader_fragment_mask");
12347 		break;
12348 	}
12349 
12350 	case OpFragmentFetchAMD:
12351 	{
12352 		auto &type = expression_type(ops[2]);
12353 		uint32_t result_type = ops[0];
12354 		uint32_t id = ops[1];
12355 
12356 		if (type.image.dim == spv::DimSubpassData)
12357 		{
12358 			emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
12359 		}
12360 		else
12361 		{
12362 			emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
12363 		}
12364 
12365 		require_extension_internal("GL_AMD_shader_fragment_mask");
12366 		break;
12367 	}
12368 
12369 	// Vulkan 1.1 sub-group stuff ...
12370 	case OpGroupNonUniformElect:
12371 	case OpGroupNonUniformBroadcast:
12372 	case OpGroupNonUniformBroadcastFirst:
12373 	case OpGroupNonUniformBallot:
12374 	case OpGroupNonUniformInverseBallot:
12375 	case OpGroupNonUniformBallotBitExtract:
12376 	case OpGroupNonUniformBallotBitCount:
12377 	case OpGroupNonUniformBallotFindLSB:
12378 	case OpGroupNonUniformBallotFindMSB:
12379 	case OpGroupNonUniformShuffle:
12380 	case OpGroupNonUniformShuffleXor:
12381 	case OpGroupNonUniformShuffleUp:
12382 	case OpGroupNonUniformShuffleDown:
12383 	case OpGroupNonUniformAll:
12384 	case OpGroupNonUniformAny:
12385 	case OpGroupNonUniformAllEqual:
12386 	case OpGroupNonUniformFAdd:
12387 	case OpGroupNonUniformIAdd:
12388 	case OpGroupNonUniformFMul:
12389 	case OpGroupNonUniformIMul:
12390 	case OpGroupNonUniformFMin:
12391 	case OpGroupNonUniformFMax:
12392 	case OpGroupNonUniformSMin:
12393 	case OpGroupNonUniformSMax:
12394 	case OpGroupNonUniformUMin:
12395 	case OpGroupNonUniformUMax:
12396 	case OpGroupNonUniformBitwiseAnd:
12397 	case OpGroupNonUniformBitwiseOr:
12398 	case OpGroupNonUniformBitwiseXor:
12399 	case OpGroupNonUniformLogicalAnd:
12400 	case OpGroupNonUniformLogicalOr:
12401 	case OpGroupNonUniformLogicalXor:
12402 	case OpGroupNonUniformQuadSwap:
12403 	case OpGroupNonUniformQuadBroadcast:
12404 		emit_subgroup_op(instruction);
12405 		break;
12406 
12407 	case OpFUnordEqual:
12408 	case OpFUnordNotEqual:
12409 	case OpFUnordLessThan:
12410 	case OpFUnordGreaterThan:
12411 	case OpFUnordLessThanEqual:
12412 	case OpFUnordGreaterThanEqual:
12413 	{
12414 		// GLSL doesn't specify if floating point comparisons are ordered or unordered,
12415 		// but glslang always emits ordered floating point compares for GLSL.
12416 		// To get unordered compares, we can test the opposite thing and invert the result.
12417 		// This way, we force true when there is any NaN present.
12418 		uint32_t op0 = ops[2];
12419 		uint32_t op1 = ops[3];
12420 
12421 		string expr;
12422 		if (expression_type(op0).vecsize > 1)
12423 		{
12424 			const char *comp_op = nullptr;
12425 			switch (opcode)
12426 			{
12427 			case OpFUnordEqual:
12428 				comp_op = "notEqual";
12429 				break;
12430 
12431 			case OpFUnordNotEqual:
12432 				comp_op = "equal";
12433 				break;
12434 
12435 			case OpFUnordLessThan:
12436 				comp_op = "greaterThanEqual";
12437 				break;
12438 
12439 			case OpFUnordLessThanEqual:
12440 				comp_op = "greaterThan";
12441 				break;
12442 
12443 			case OpFUnordGreaterThan:
12444 				comp_op = "lessThanEqual";
12445 				break;
12446 
12447 			case OpFUnordGreaterThanEqual:
12448 				comp_op = "lessThan";
12449 				break;
12450 
12451 			default:
12452 				assert(0);
12453 				break;
12454 			}
12455 
12456 			expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
12457 		}
12458 		else
12459 		{
12460 			const char *comp_op = nullptr;
12461 			switch (opcode)
12462 			{
12463 			case OpFUnordEqual:
12464 				comp_op = " != ";
12465 				break;
12466 
12467 			case OpFUnordNotEqual:
12468 				comp_op = " == ";
12469 				break;
12470 
12471 			case OpFUnordLessThan:
12472 				comp_op = " >= ";
12473 				break;
12474 
12475 			case OpFUnordLessThanEqual:
12476 				comp_op = " > ";
12477 				break;
12478 
12479 			case OpFUnordGreaterThan:
12480 				comp_op = " <= ";
12481 				break;
12482 
12483 			case OpFUnordGreaterThanEqual:
12484 				comp_op = " < ";
12485 				break;
12486 
12487 			default:
12488 				assert(0);
12489 				break;
12490 			}
12491 
12492 			expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
12493 		}
12494 
12495 		emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
12496 		inherit_expression_dependencies(ops[1], op0);
12497 		inherit_expression_dependencies(ops[1], op1);
12498 		break;
12499 	}
12500 
12501 	case OpReportIntersectionKHR:
12502 		// NV is same opcode.
12503 		forced_temporaries.insert(ops[1]);
12504 		if (ray_tracing_is_khr)
12505 			GLSL_BFOP(reportIntersectionEXT);
12506 		else
12507 			GLSL_BFOP(reportIntersectionNV);
12508 		flush_control_dependent_expressions(current_emitting_block->self);
12509 		break;
12510 	case OpIgnoreIntersectionNV:
12511 		// KHR variant is a terminator.
12512 		statement("ignoreIntersectionNV();");
12513 		flush_control_dependent_expressions(current_emitting_block->self);
12514 		break;
12515 	case OpTerminateRayNV:
12516 		// KHR variant is a terminator.
12517 		statement("terminateRayNV();");
12518 		flush_control_dependent_expressions(current_emitting_block->self);
12519 		break;
12520 	case OpTraceNV:
12521 		statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12522 		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12523 		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12524 		          to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
12525 		flush_control_dependent_expressions(current_emitting_block->self);
12526 		break;
12527 	case OpTraceRayKHR:
12528 		if (!has_decoration(ops[10], DecorationLocation))
12529 			SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
12530 		statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12531 		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12532 		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12533 		          to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
12534 		flush_control_dependent_expressions(current_emitting_block->self);
12535 		break;
12536 	case OpExecuteCallableNV:
12537 		statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
12538 		flush_control_dependent_expressions(current_emitting_block->self);
12539 		break;
12540 	case OpExecuteCallableKHR:
12541 		if (!has_decoration(ops[1], DecorationLocation))
12542 			SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
12543 		statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
12544 		flush_control_dependent_expressions(current_emitting_block->self);
12545 		break;
12546 
12547 		// Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
12548 	case OpRayQueryInitializeKHR:
12549 		flush_variable_declaration(ops[0]);
12550 		statement("rayQueryInitializeEXT(",
12551 		          to_expression(ops[0]), ", ", to_expression(ops[1]), ", ",
12552 		          to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
12553 		          to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12554 		          to_expression(ops[6]), ", ", to_expression(ops[7]), ");");
12555 		break;
12556 	case OpRayQueryProceedKHR:
12557 		flush_variable_declaration(ops[0]);
12558 		emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false);
12559 		break;
12560 	case OpRayQueryTerminateKHR:
12561 		flush_variable_declaration(ops[0]);
12562 		statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");");
12563 		break;
12564 	case OpRayQueryGenerateIntersectionKHR:
12565 		flush_variable_declaration(ops[0]);
12566 		statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
12567 		break;
12568 	case OpRayQueryConfirmIntersectionKHR:
12569 		flush_variable_declaration(ops[0]);
12570 		statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");");
12571 		break;
12572 #define GLSL_RAY_QUERY_GET_OP(op) \
12573 	case OpRayQueryGet##op##KHR: \
12574 		flush_variable_declaration(ops[2]); \
12575 		emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
12576 		break
12577 #define GLSL_RAY_QUERY_GET_OP2(op) \
12578 	case OpRayQueryGet##op##KHR: \
12579 		flush_variable_declaration(ops[2]); \
12580 		emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
12581 		break
12582 	GLSL_RAY_QUERY_GET_OP(RayTMin);
12583 	GLSL_RAY_QUERY_GET_OP(RayFlags);
12584 	GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
12585 	GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
12586 	GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
12587 	GLSL_RAY_QUERY_GET_OP2(IntersectionType);
12588 	GLSL_RAY_QUERY_GET_OP2(IntersectionT);
12589 	GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
12590 	GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
12591 	GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
12592 	GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
12593 	GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
12594 	GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
12595 	GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
12596 	GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
12597 	GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
12598 	GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
12599 	GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
12600 #undef GLSL_RAY_QUERY_GET_OP
12601 #undef GLSL_RAY_QUERY_GET_OP2
12602 
12603 	case OpConvertUToAccelerationStructureKHR:
12604 		require_extension_internal("GL_EXT_ray_tracing");
12605 		GLSL_UFOP(accelerationStructureEXT);
12606 		break;
12607 
12608 	case OpConvertUToPtr:
12609 	{
12610 		auto &type = get<SPIRType>(ops[0]);
12611 		if (type.storage != StorageClassPhysicalStorageBufferEXT)
12612 			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
12613 
12614 		auto op = type_to_glsl(type);
12615 		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12616 		break;
12617 	}
12618 
12619 	case OpConvertPtrToU:
12620 	{
12621 		auto &type = get<SPIRType>(ops[0]);
12622 		auto &ptr_type = expression_type(ops[2]);
12623 		if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
12624 			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
12625 
12626 		auto op = type_to_glsl(type);
12627 		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12628 		break;
12629 	}
12630 
12631 	case OpUndef:
12632 		// Undefined value has been declared.
12633 		break;
12634 
12635 	case OpLine:
12636 	{
12637 		emit_line_directive(ops[0], ops[1]);
12638 		break;
12639 	}
12640 
12641 	case OpNoLine:
12642 		break;
12643 
12644 	case OpDemoteToHelperInvocationEXT:
12645 		if (!options.vulkan_semantics)
12646 			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12647 		require_extension_internal("GL_EXT_demote_to_helper_invocation");
12648 		statement(backend.demote_literal, ";");
12649 		break;
12650 
12651 	case OpIsHelperInvocationEXT:
12652 		if (!options.vulkan_semantics)
12653 			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12654 		require_extension_internal("GL_EXT_demote_to_helper_invocation");
12655 		emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
12656 		break;
12657 
12658 	case OpBeginInvocationInterlockEXT:
12659 		// If the interlock is complex, we emit this elsewhere.
12660 		if (!interlocked_is_complex)
12661 		{
12662 			if (options.es)
12663 				statement("beginInvocationInterlockNV();");
12664 			else
12665 				statement("beginInvocationInterlockARB();");
12666 
12667 			flush_all_active_variables();
12668 			// Make sure forwarding doesn't propagate outside interlock region.
12669 		}
12670 		break;
12671 
12672 	case OpEndInvocationInterlockEXT:
12673 		// If the interlock is complex, we emit this elsewhere.
12674 		if (!interlocked_is_complex)
12675 		{
12676 			if (options.es)
12677 				statement("endInvocationInterlockNV();");
12678 			else
12679 				statement("endInvocationInterlockARB();");
12680 
12681 			flush_all_active_variables();
12682 			// Make sure forwarding doesn't propagate outside interlock region.
12683 		}
12684 		break;
12685 
12686 	default:
12687 		statement("// unimplemented op ", instruction.op);
12688 		break;
12689 	}
12690 }
12691 
12692 // Appends function arguments, mapped from global variables, beyond the specified arg index.
12693 // This is used when a function call uses fewer arguments than the function defines.
12694 // This situation may occur if the function signature has been dynamically modified to
12695 // extract global variables referenced from within the function, and convert them to
12696 // function arguments. This is necessary for shader languages that do not support global
12697 // access to shader input content from within a function (eg. Metal). Each additional
12698 // function args uses the name of the global variable. Function nesting will modify the
12699 // functions and function calls all the way up the nesting chain.
append_global_func_args(const SPIRFunction & func,uint32_t index,SmallVector<string> & arglist)12700 void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
12701 {
12702 	auto &args = func.arguments;
12703 	uint32_t arg_cnt = uint32_t(args.size());
12704 	for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
12705 	{
12706 		auto &arg = args[arg_idx];
12707 		assert(arg.alias_global_variable);
12708 
12709 		// If the underlying variable needs to be declared
12710 		// (ie. a local variable with deferred declaration), do so now.
12711 		uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
12712 		if (var_id)
12713 			flush_variable_declaration(var_id);
12714 
12715 		arglist.push_back(to_func_call_arg(arg, arg.id));
12716 	}
12717 }
12718 
to_member_name(const SPIRType & type,uint32_t index)12719 string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
12720 {
12721 	if (type.type_alias != TypeID(0) &&
12722 	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
12723 	{
12724 		return to_member_name(get<SPIRType>(type.type_alias), index);
12725 	}
12726 
12727 	auto &memb = ir.meta[type.self].members;
12728 	if (index < memb.size() && !memb[index].alias.empty())
12729 		return memb[index].alias;
12730 	else
12731 		return join("_m", index);
12732 }
12733 
to_member_reference(uint32_t,const SPIRType & type,uint32_t index,bool)12734 string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
12735 {
12736 	return join(".", to_member_name(type, index));
12737 }
12738 
to_multi_member_reference(const SPIRType & type,const SmallVector<uint32_t> & indices)12739 string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
12740 {
12741 	string ret;
12742 	auto *member_type = &type;
12743 	for (auto &index : indices)
12744 	{
12745 		ret += join(".", to_member_name(*member_type, index));
12746 		member_type = &get<SPIRType>(member_type->member_types[index]);
12747 	}
12748 	return ret;
12749 }
12750 
add_member_name(SPIRType & type,uint32_t index)12751 void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
12752 {
12753 	auto &memb = ir.meta[type.self].members;
12754 	if (index < memb.size() && !memb[index].alias.empty())
12755 	{
12756 		auto &name = memb[index].alias;
12757 		if (name.empty())
12758 			return;
12759 
12760 		ParsedIR::sanitize_identifier(name, true, true);
12761 		update_name_cache(type.member_name_cache, name);
12762 	}
12763 }
12764 
12765 // Checks whether the ID is a row_major matrix that requires conversion before use
is_non_native_row_major_matrix(uint32_t id)12766 bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
12767 {
12768 	// Natively supported row-major matrices do not need to be converted.
12769 	// Legacy targets do not support row major.
12770 	if (backend.native_row_major_matrix && !is_legacy())
12771 		return false;
12772 
12773 	auto *e = maybe_get<SPIRExpression>(id);
12774 	if (e)
12775 		return e->need_transpose;
12776 	else
12777 		return has_decoration(id, DecorationRowMajor);
12778 }
12779 
12780 // Checks whether the member is a row_major matrix that requires conversion before use
member_is_non_native_row_major_matrix(const SPIRType & type,uint32_t index)12781 bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
12782 {
12783 	// Natively supported row-major matrices do not need to be converted.
12784 	if (backend.native_row_major_matrix && !is_legacy())
12785 		return false;
12786 
12787 	// Non-matrix or column-major matrix types do not need to be converted.
12788 	if (!has_member_decoration(type.self, index, DecorationRowMajor))
12789 		return false;
12790 
12791 	// Only square row-major matrices can be converted at this time.
12792 	// Converting non-square matrices will require defining custom GLSL function that
12793 	// swaps matrix elements while retaining the original dimensional form of the matrix.
12794 	const auto mbr_type = get<SPIRType>(type.member_types[index]);
12795 	if (mbr_type.columns != mbr_type.vecsize)
12796 		SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
12797 
12798 	return true;
12799 }
12800 
12801 // Checks if we need to remap physical type IDs when declaring the type in a buffer.
member_is_remapped_physical_type(const SPIRType & type,uint32_t index) const12802 bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
12803 {
12804 	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
12805 }
12806 
12807 // Checks whether the member is in packed data type, that might need to be unpacked.
member_is_packed_physical_type(const SPIRType & type,uint32_t index) const12808 bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
12809 {
12810 	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
12811 }
12812 
12813 // Wraps the expression string in a function call that converts the
12814 // row_major matrix result of the expression to a column_major matrix.
12815 // Base implementation uses the standard library transpose() function.
12816 // Subclasses may override to use a different function.
convert_row_major_matrix(string exp_str,const SPIRType & exp_type,uint32_t,bool)12817 string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
12818                                               bool /*is_packed*/)
12819 {
12820 	strip_enclosed_expression(exp_str);
12821 	if (!is_matrix(exp_type))
12822 	{
12823 		auto column_index = exp_str.find_last_of('[');
12824 		if (column_index == string::npos)
12825 			return exp_str;
12826 
12827 		auto column_expr = exp_str.substr(column_index);
12828 		exp_str.resize(column_index);
12829 
12830 		auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
12831 
12832 		// Loading a column from a row-major matrix. Unroll the load.
12833 		for (uint32_t c = 0; c < exp_type.vecsize; c++)
12834 		{
12835 			transposed_expr += join(exp_str, '[', c, ']', column_expr);
12836 			if (c + 1 < exp_type.vecsize)
12837 				transposed_expr += ", ";
12838 		}
12839 
12840 		transposed_expr += ")";
12841 		return transposed_expr;
12842 	}
12843 	else if (options.version < 120)
12844 	{
12845 		// GLSL 110, ES 100 do not have transpose(), so emulate it.  Note that
12846 		// these GLSL versions do not support non-square matrices.
12847 		if (exp_type.vecsize == 2 && exp_type.columns == 2)
12848 		{
12849 			if (!requires_transpose_2x2)
12850 			{
12851 				requires_transpose_2x2 = true;
12852 				force_recompile();
12853 			}
12854 		}
12855 		else if (exp_type.vecsize == 3 && exp_type.columns == 3)
12856 		{
12857 			if (!requires_transpose_3x3)
12858 			{
12859 				requires_transpose_3x3 = true;
12860 				force_recompile();
12861 			}
12862 		}
12863 		else if (exp_type.vecsize == 4 && exp_type.columns == 4)
12864 		{
12865 			if (!requires_transpose_4x4)
12866 			{
12867 				requires_transpose_4x4 = true;
12868 				force_recompile();
12869 			}
12870 		}
12871 		else
12872 			SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
12873 		return join("spvTranspose(", exp_str, ")");
12874 	}
12875 	else
12876 		return join("transpose(", exp_str, ")");
12877 }
12878 
variable_decl(const SPIRType & type,const string & name,uint32_t id)12879 string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
12880 {
12881 	string type_name = type_to_glsl(type, id);
12882 	remap_variable_type_name(type, name, type_name);
12883 	return join(type_name, " ", name, type_to_array_glsl(type));
12884 }
12885 
variable_decl_is_remapped_storage(const SPIRVariable & var,StorageClass storage) const12886 bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
12887 {
12888 	return var.storage == storage;
12889 }
12890 
12891 // Emit a structure member. Subclasses may override to modify output,
12892 // or to dynamically add a padding member if needed.
emit_struct_member(const SPIRType & type,uint32_t member_type_id,uint32_t index,const string & qualifier,uint32_t)12893 void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
12894                                       const string &qualifier, uint32_t)
12895 {
12896 	auto &membertype = get<SPIRType>(member_type_id);
12897 
12898 	Bitset memberflags;
12899 	auto &memb = ir.meta[type.self].members;
12900 	if (index < memb.size())
12901 		memberflags = memb[index].decoration_flags;
12902 
12903 	string qualifiers;
12904 	bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
12905 	                ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
12906 
12907 	if (is_block)
12908 		qualifiers = to_interpolation_qualifiers(memberflags);
12909 
12910 	statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
12911 	          variable_decl(membertype, to_member_name(type, index)), ";");
12912 }
12913 
emit_struct_padding_target(const SPIRType &)12914 void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
12915 {
12916 }
12917 
flags_to_qualifiers_glsl(const SPIRType & type,const Bitset & flags)12918 string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
12919 {
12920 	// GL_EXT_buffer_reference variables can be marked as restrict.
12921 	if (flags.get(DecorationRestrictPointerEXT))
12922 		return "restrict ";
12923 
12924 	string qual;
12925 
12926 	if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier)
12927 		qual = "precise ";
12928 
12929 	// Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
12930 	bool type_supports_precision =
12931 			type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
12932 			type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
12933 			type.basetype == SPIRType::Sampler;
12934 
12935 	if (!type_supports_precision)
12936 		return qual;
12937 
12938 	if (options.es)
12939 	{
12940 		auto &execution = get_entry_point();
12941 
12942 		if (flags.get(DecorationRelaxedPrecision))
12943 		{
12944 			bool implied_fmediump = type.basetype == SPIRType::Float &&
12945 			                        options.fragment.default_float_precision == Options::Mediump &&
12946 			                        execution.model == ExecutionModelFragment;
12947 
12948 			bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
12949 			                        options.fragment.default_int_precision == Options::Mediump &&
12950 			                        execution.model == ExecutionModelFragment;
12951 
12952 			qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
12953 		}
12954 		else
12955 		{
12956 			bool implied_fhighp =
12957 			    type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
12958 			                                          execution.model == ExecutionModelFragment) ||
12959 			                                         (execution.model != ExecutionModelFragment));
12960 
12961 			bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
12962 			                      ((options.fragment.default_int_precision == Options::Highp &&
12963 			                        execution.model == ExecutionModelFragment) ||
12964 			                       (execution.model != ExecutionModelFragment));
12965 
12966 			qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
12967 		}
12968 	}
12969 	else if (backend.allow_precision_qualifiers)
12970 	{
12971 		// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
12972 		// The default is highp however, so only emit mediump in the rare case that a shader has these.
12973 		if (flags.get(DecorationRelaxedPrecision))
12974 			qual += "mediump ";
12975 	}
12976 
12977 	return qual;
12978 }
12979 
to_precision_qualifiers_glsl(uint32_t id)12980 string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
12981 {
12982 	auto &type = expression_type(id);
12983 	bool use_precision_qualifiers = backend.allow_precision_qualifiers;
12984 	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
12985 	{
12986 		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
12987 		auto &result_type = get<SPIRType>(type.image.type);
12988 		if (result_type.width < 32)
12989 			return "mediump ";
12990 	}
12991 	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
12992 }
12993 
fixup_io_block_patch_qualifiers(const SPIRVariable & var)12994 void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
12995 {
12996 	// Works around weird behavior in glslangValidator where
12997 	// a patch out block is translated to just block members getting the decoration.
12998 	// To make glslang not complain when we compile again, we have to transform this back to a case where
12999 	// the variable itself has Patch decoration, and not members.
13000 	auto &type = get<SPIRType>(var.basetype);
13001 	if (has_decoration(type.self, DecorationBlock))
13002 	{
13003 		uint32_t member_count = uint32_t(type.member_types.size());
13004 		for (uint32_t i = 0; i < member_count; i++)
13005 		{
13006 			if (has_member_decoration(type.self, i, DecorationPatch))
13007 			{
13008 				set_decoration(var.self, DecorationPatch);
13009 				break;
13010 			}
13011 		}
13012 
13013 		if (has_decoration(var.self, DecorationPatch))
13014 			for (uint32_t i = 0; i < member_count; i++)
13015 				unset_member_decoration(type.self, i, DecorationPatch);
13016 	}
13017 }
13018 
to_qualifiers_glsl(uint32_t id)13019 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
13020 {
13021 	auto &flags = ir.meta[id].decoration.decoration_flags;
13022 	string res;
13023 
13024 	auto *var = maybe_get<SPIRVariable>(id);
13025 
13026 	if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
13027 		res += "shared ";
13028 
13029 	res += to_interpolation_qualifiers(flags);
13030 	if (var)
13031 		res += to_storage_qualifiers_glsl(*var);
13032 
13033 	auto &type = expression_type(id);
13034 	if (type.image.dim != DimSubpassData && type.image.sampled == 2)
13035 	{
13036 		if (flags.get(DecorationCoherent))
13037 			res += "coherent ";
13038 		if (flags.get(DecorationRestrict))
13039 			res += "restrict ";
13040 
13041 		if (flags.get(DecorationNonWritable))
13042 			res += "readonly ";
13043 
13044 		bool formatted_load = type.image.format == ImageFormatUnknown;
13045 		if (flags.get(DecorationNonReadable))
13046 		{
13047 			res += "writeonly ";
13048 			formatted_load = false;
13049 		}
13050 
13051 		if (formatted_load)
13052 		{
13053 			if (!options.es)
13054 				require_extension_internal("GL_EXT_shader_image_load_formatted");
13055 			else
13056 				SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
13057 		}
13058 	}
13059 
13060 	res += to_precision_qualifiers_glsl(id);
13061 
13062 	return res;
13063 }
13064 
argument_decl(const SPIRFunction::Parameter & arg)13065 string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
13066 {
13067 	// glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
13068 	auto &type = expression_type(arg.id);
13069 	const char *direction = "";
13070 
13071 	if (type.pointer)
13072 	{
13073 		if (arg.write_count && arg.read_count)
13074 			direction = "inout ";
13075 		else if (arg.write_count)
13076 			direction = "out ";
13077 	}
13078 
13079 	return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
13080 }
13081 
to_initializer_expression(const SPIRVariable & var)13082 string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
13083 {
13084 	return to_expression(var.initializer);
13085 }
13086 
to_zero_initialized_expression(uint32_t type_id)13087 string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
13088 {
13089 #ifndef NDEBUG
13090 	auto &type = get<SPIRType>(type_id);
13091 	assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
13092 	       type.storage == StorageClassGeneric);
13093 #endif
13094 	uint32_t id = ir.increase_bound_by(1);
13095 	ir.make_constant_null(id, type_id, false);
13096 	return constant_expression(get<SPIRConstant>(id));
13097 }
13098 
type_can_zero_initialize(const SPIRType & type) const13099 bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
13100 {
13101 	if (type.pointer)
13102 		return false;
13103 
13104 	if (!type.array.empty() && options.flatten_multidimensional_arrays)
13105 		return false;
13106 
13107 	for (auto &literal : type.array_size_literal)
13108 		if (!literal)
13109 			return false;
13110 
13111 	for (auto &memb : type.member_types)
13112 		if (!type_can_zero_initialize(get<SPIRType>(memb)))
13113 			return false;
13114 
13115 	return true;
13116 }
13117 
variable_decl(const SPIRVariable & variable)13118 string CompilerGLSL::variable_decl(const SPIRVariable &variable)
13119 {
13120 	// Ignore the pointer type since GLSL doesn't have pointers.
13121 	auto &type = get_variable_data_type(variable);
13122 
13123 	if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
13124 		SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
13125 
13126 	auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
13127 
13128 	if (variable.loop_variable && variable.static_expression)
13129 	{
13130 		uint32_t expr = variable.static_expression;
13131 		if (ir.ids[expr].get_type() != TypeUndef)
13132 			res += join(" = ", to_expression(variable.static_expression));
13133 		else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13134 			res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
13135 	}
13136 	else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
13137 	{
13138 		uint32_t expr = variable.initializer;
13139 		if (ir.ids[expr].get_type() != TypeUndef)
13140 			res += join(" = ", to_initializer_expression(variable));
13141 		else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13142 			res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
13143 	}
13144 
13145 	return res;
13146 }
13147 
to_pls_qualifiers_glsl(const SPIRVariable & variable)13148 const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
13149 {
13150 	auto &flags = ir.meta[variable.self].decoration.decoration_flags;
13151 	if (flags.get(DecorationRelaxedPrecision))
13152 		return "mediump ";
13153 	else
13154 		return "highp ";
13155 }
13156 
pls_decl(const PlsRemap & var)13157 string CompilerGLSL::pls_decl(const PlsRemap &var)
13158 {
13159 	auto &variable = get<SPIRVariable>(var.id);
13160 
13161 	SPIRType type;
13162 	type.vecsize = pls_format_to_components(var.format);
13163 	type.basetype = pls_format_to_basetype(var.format);
13164 
13165 	return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
13166 	            to_name(variable.self));
13167 }
13168 
to_array_size_literal(const SPIRType & type) const13169 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
13170 {
13171 	return to_array_size_literal(type, uint32_t(type.array.size() - 1));
13172 }
13173 
to_array_size_literal(const SPIRType & type,uint32_t index) const13174 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
13175 {
13176 	assert(type.array.size() == type.array_size_literal.size());
13177 
13178 	if (type.array_size_literal[index])
13179 	{
13180 		return type.array[index];
13181 	}
13182 	else
13183 	{
13184 		// Use the default spec constant value.
13185 		// This is the best we can do.
13186 		return evaluate_constant_u32(type.array[index]);
13187 	}
13188 }
13189 
to_array_size(const SPIRType & type,uint32_t index)13190 string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
13191 {
13192 	assert(type.array.size() == type.array_size_literal.size());
13193 
13194 	auto &size = type.array[index];
13195 	if (!type.array_size_literal[index])
13196 		return to_expression(size);
13197 	else if (size)
13198 		return convert_to_string(size);
13199 	else if (!backend.unsized_array_supported)
13200 	{
13201 		// For runtime-sized arrays, we can work around
13202 		// lack of standard support for this by simply having
13203 		// a single element array.
13204 		//
13205 		// Runtime length arrays must always be the last element
13206 		// in an interface block.
13207 		return "1";
13208 	}
13209 	else
13210 		return "";
13211 }
13212 
type_to_array_glsl(const SPIRType & type)13213 string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
13214 {
13215 	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13216 	{
13217 		// We are using a wrapped pointer type, and we should not emit any array declarations here.
13218 		return "";
13219 	}
13220 
13221 	if (type.array.empty())
13222 		return "";
13223 
13224 	if (options.flatten_multidimensional_arrays)
13225 	{
13226 		string res;
13227 		res += "[";
13228 		for (auto i = uint32_t(type.array.size()); i; i--)
13229 		{
13230 			res += enclose_expression(to_array_size(type, i - 1));
13231 			if (i > 1)
13232 				res += " * ";
13233 		}
13234 		res += "]";
13235 		return res;
13236 	}
13237 	else
13238 	{
13239 		if (type.array.size() > 1)
13240 		{
13241 			if (!options.es && options.version < 430)
13242 				require_extension_internal("GL_ARB_arrays_of_arrays");
13243 			else if (options.es && options.version < 310)
13244 				SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
13245 				                  "Try using --flatten-multidimensional-arrays or set "
13246 				                  "options.flatten_multidimensional_arrays to true.");
13247 		}
13248 
13249 		string res;
13250 		for (auto i = uint32_t(type.array.size()); i; i--)
13251 		{
13252 			res += "[";
13253 			res += to_array_size(type, i - 1);
13254 			res += "]";
13255 		}
13256 		return res;
13257 	}
13258 }
13259 
image_type_glsl(const SPIRType & type,uint32_t id)13260 string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
13261 {
13262 	auto &imagetype = get<SPIRType>(type.image.type);
13263 	string res;
13264 
13265 	switch (imagetype.basetype)
13266 	{
13267 	case SPIRType::Int:
13268 	case SPIRType::Short:
13269 	case SPIRType::SByte:
13270 		res = "i";
13271 		break;
13272 	case SPIRType::UInt:
13273 	case SPIRType::UShort:
13274 	case SPIRType::UByte:
13275 		res = "u";
13276 		break;
13277 	default:
13278 		break;
13279 	}
13280 
13281 	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
13282 	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
13283 
13284 	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
13285 		return res + "subpassInput" + (type.image.ms ? "MS" : "");
13286 	else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
13287 	         subpass_input_is_framebuffer_fetch(id))
13288 	{
13289 		SPIRType sampled_type = get<SPIRType>(type.image.type);
13290 		sampled_type.vecsize = 4;
13291 		return type_to_glsl(sampled_type);
13292 	}
13293 
13294 	// If we're emulating subpassInput with samplers, force sampler2D
13295 	// so we don't have to specify format.
13296 	if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
13297 	{
13298 		// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
13299 		if (type.image.dim == DimBuffer && type.image.sampled == 1)
13300 			res += "sampler";
13301 		else
13302 			res += type.image.sampled == 2 ? "image" : "texture";
13303 	}
13304 	else
13305 		res += "sampler";
13306 
13307 	switch (type.image.dim)
13308 	{
13309 	case Dim1D:
13310 		res += "1D";
13311 		break;
13312 	case Dim2D:
13313 		res += "2D";
13314 		break;
13315 	case Dim3D:
13316 		res += "3D";
13317 		break;
13318 	case DimCube:
13319 		res += "Cube";
13320 		break;
13321 	case DimRect:
13322 		if (options.es)
13323 			SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
13324 
13325 		if (is_legacy_desktop())
13326 			require_extension_internal("GL_ARB_texture_rectangle");
13327 
13328 		res += "2DRect";
13329 		break;
13330 
13331 	case DimBuffer:
13332 		if (options.es && options.version < 320)
13333 			require_extension_internal("GL_OES_texture_buffer");
13334 		else if (!options.es && options.version < 300)
13335 			require_extension_internal("GL_EXT_texture_buffer_object");
13336 		res += "Buffer";
13337 		break;
13338 
13339 	case DimSubpassData:
13340 		res += "2D";
13341 		break;
13342 	default:
13343 		SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
13344 	}
13345 
13346 	if (type.image.ms)
13347 		res += "MS";
13348 	if (type.image.arrayed)
13349 	{
13350 		if (is_legacy_desktop())
13351 			require_extension_internal("GL_EXT_texture_array");
13352 		res += "Array";
13353 	}
13354 
13355 	// "Shadow" state in GLSL only exists for samplers and combined image samplers.
13356 	if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
13357 	    image_is_comparison(type, id))
13358 	{
13359 		res += "Shadow";
13360 	}
13361 
13362 	return res;
13363 }
13364 
type_to_glsl_constructor(const SPIRType & type)13365 string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
13366 {
13367 	if (backend.use_array_constructor && type.array.size() > 1)
13368 	{
13369 		if (options.flatten_multidimensional_arrays)
13370 			SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
13371 			                  "e.g. float[][]().");
13372 		else if (!options.es && options.version < 430)
13373 			require_extension_internal("GL_ARB_arrays_of_arrays");
13374 		else if (options.es && options.version < 310)
13375 			SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
13376 	}
13377 
13378 	auto e = type_to_glsl(type);
13379 	if (backend.use_array_constructor)
13380 	{
13381 		for (uint32_t i = 0; i < type.array.size(); i++)
13382 			e += "[]";
13383 	}
13384 	return e;
13385 }
13386 
13387 // The optional id parameter indicates the object whose type we are trying
13388 // to find the description for. It is optional. Most type descriptions do not
13389 // depend on a specific object's use of that type.
type_to_glsl(const SPIRType & type,uint32_t id)13390 string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
13391 {
13392 	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13393 	{
13394 		// Need to create a magic type name which compacts the entire type information.
13395 		string name = type_to_glsl(get_pointee_type(type));
13396 		for (size_t i = 0; i < type.array.size(); i++)
13397 		{
13398 			if (type.array_size_literal[i])
13399 				name += join(type.array[i], "_");
13400 			else
13401 				name += join("id", type.array[i], "_");
13402 		}
13403 		name += "Pointer";
13404 		return name;
13405 	}
13406 
13407 	switch (type.basetype)
13408 	{
13409 	case SPIRType::Struct:
13410 		// Need OpName lookup here to get a "sensible" name for a struct.
13411 		if (backend.explicit_struct_type)
13412 			return join("struct ", to_name(type.self));
13413 		else
13414 			return to_name(type.self);
13415 
13416 	case SPIRType::Image:
13417 	case SPIRType::SampledImage:
13418 		return image_type_glsl(type, id);
13419 
13420 	case SPIRType::Sampler:
13421 		// The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
13422 		// this distinction into the type system.
13423 		return comparison_ids.count(id) ? "samplerShadow" : "sampler";
13424 
13425 	case SPIRType::AccelerationStructure:
13426 		return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
13427 
13428 	case SPIRType::RayQuery:
13429 		return "rayQueryEXT";
13430 
13431 	case SPIRType::Void:
13432 		return "void";
13433 
13434 	default:
13435 		break;
13436 	}
13437 
13438 	if (type.basetype == SPIRType::UInt && is_legacy())
13439 		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
13440 
13441 	if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
13442 	{
13443 		switch (type.basetype)
13444 		{
13445 		case SPIRType::Boolean:
13446 			return "bool";
13447 		case SPIRType::SByte:
13448 			return backend.basic_int8_type;
13449 		case SPIRType::UByte:
13450 			return backend.basic_uint8_type;
13451 		case SPIRType::Short:
13452 			return backend.basic_int16_type;
13453 		case SPIRType::UShort:
13454 			return backend.basic_uint16_type;
13455 		case SPIRType::Int:
13456 			return backend.basic_int_type;
13457 		case SPIRType::UInt:
13458 			return backend.basic_uint_type;
13459 		case SPIRType::AtomicCounter:
13460 			return "atomic_uint";
13461 		case SPIRType::Half:
13462 			return "float16_t";
13463 		case SPIRType::Float:
13464 			return "float";
13465 		case SPIRType::Double:
13466 			return "double";
13467 		case SPIRType::Int64:
13468 			return "int64_t";
13469 		case SPIRType::UInt64:
13470 			return "uint64_t";
13471 		default:
13472 			return "???";
13473 		}
13474 	}
13475 	else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
13476 	{
13477 		switch (type.basetype)
13478 		{
13479 		case SPIRType::Boolean:
13480 			return join("bvec", type.vecsize);
13481 		case SPIRType::SByte:
13482 			return join("i8vec", type.vecsize);
13483 		case SPIRType::UByte:
13484 			return join("u8vec", type.vecsize);
13485 		case SPIRType::Short:
13486 			return join("i16vec", type.vecsize);
13487 		case SPIRType::UShort:
13488 			return join("u16vec", type.vecsize);
13489 		case SPIRType::Int:
13490 			return join("ivec", type.vecsize);
13491 		case SPIRType::UInt:
13492 			return join("uvec", type.vecsize);
13493 		case SPIRType::Half:
13494 			return join("f16vec", type.vecsize);
13495 		case SPIRType::Float:
13496 			return join("vec", type.vecsize);
13497 		case SPIRType::Double:
13498 			return join("dvec", type.vecsize);
13499 		case SPIRType::Int64:
13500 			return join("i64vec", type.vecsize);
13501 		case SPIRType::UInt64:
13502 			return join("u64vec", type.vecsize);
13503 		default:
13504 			return "???";
13505 		}
13506 	}
13507 	else if (type.vecsize == type.columns) // Simple Matrix builtin
13508 	{
13509 		switch (type.basetype)
13510 		{
13511 		case SPIRType::Boolean:
13512 			return join("bmat", type.vecsize);
13513 		case SPIRType::Int:
13514 			return join("imat", type.vecsize);
13515 		case SPIRType::UInt:
13516 			return join("umat", type.vecsize);
13517 		case SPIRType::Half:
13518 			return join("f16mat", type.vecsize);
13519 		case SPIRType::Float:
13520 			return join("mat", type.vecsize);
13521 		case SPIRType::Double:
13522 			return join("dmat", type.vecsize);
13523 		// Matrix types not supported for int64/uint64.
13524 		default:
13525 			return "???";
13526 		}
13527 	}
13528 	else
13529 	{
13530 		switch (type.basetype)
13531 		{
13532 		case SPIRType::Boolean:
13533 			return join("bmat", type.columns, "x", type.vecsize);
13534 		case SPIRType::Int:
13535 			return join("imat", type.columns, "x", type.vecsize);
13536 		case SPIRType::UInt:
13537 			return join("umat", type.columns, "x", type.vecsize);
13538 		case SPIRType::Half:
13539 			return join("f16mat", type.columns, "x", type.vecsize);
13540 		case SPIRType::Float:
13541 			return join("mat", type.columns, "x", type.vecsize);
13542 		case SPIRType::Double:
13543 			return join("dmat", type.columns, "x", type.vecsize);
13544 		// Matrix types not supported for int64/uint64.
13545 		default:
13546 			return "???";
13547 		}
13548 	}
13549 }
13550 
add_variable(unordered_set<string> & variables_primary,const unordered_set<string> & variables_secondary,string & name)13551 void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
13552                                 const unordered_set<string> &variables_secondary, string &name)
13553 {
13554 	if (name.empty())
13555 		return;
13556 
13557 	ParsedIR::sanitize_underscores(name);
13558 	if (ParsedIR::is_globally_reserved_identifier(name, true))
13559 	{
13560 		name.clear();
13561 		return;
13562 	}
13563 
13564 	update_name_cache(variables_primary, variables_secondary, name);
13565 }
13566 
add_local_variable_name(uint32_t id)13567 void CompilerGLSL::add_local_variable_name(uint32_t id)
13568 {
13569 	add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
13570 }
13571 
add_resource_name(uint32_t id)13572 void CompilerGLSL::add_resource_name(uint32_t id)
13573 {
13574 	add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
13575 }
13576 
add_header_line(const std::string & line)13577 void CompilerGLSL::add_header_line(const std::string &line)
13578 {
13579 	header_lines.push_back(line);
13580 }
13581 
has_extension(const std::string & ext) const13582 bool CompilerGLSL::has_extension(const std::string &ext) const
13583 {
13584 	auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
13585 	return itr != end(forced_extensions);
13586 }
13587 
require_extension(const std::string & ext)13588 void CompilerGLSL::require_extension(const std::string &ext)
13589 {
13590 	if (!has_extension(ext))
13591 		forced_extensions.push_back(ext);
13592 }
13593 
require_extension_internal(const string & ext)13594 void CompilerGLSL::require_extension_internal(const string &ext)
13595 {
13596 	if (backend.supports_extensions && !has_extension(ext))
13597 	{
13598 		forced_extensions.push_back(ext);
13599 		force_recompile();
13600 	}
13601 }
13602 
flatten_buffer_block(VariableID id)13603 void CompilerGLSL::flatten_buffer_block(VariableID id)
13604 {
13605 	auto &var = get<SPIRVariable>(id);
13606 	auto &type = get<SPIRType>(var.basetype);
13607 	auto name = to_name(type.self, false);
13608 	auto &flags = ir.meta[type.self].decoration.decoration_flags;
13609 
13610 	if (!type.array.empty())
13611 		SPIRV_CROSS_THROW(name + " is an array of UBOs.");
13612 	if (type.basetype != SPIRType::Struct)
13613 		SPIRV_CROSS_THROW(name + " is not a struct.");
13614 	if (!flags.get(DecorationBlock))
13615 		SPIRV_CROSS_THROW(name + " is not a block.");
13616 	if (type.member_types.empty())
13617 		SPIRV_CROSS_THROW(name + " is an empty struct.");
13618 
13619 	flattened_buffer_blocks.insert(id);
13620 }
13621 
builtin_translates_to_nonarray(spv::BuiltIn) const13622 bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
13623 {
13624 	return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
13625 }
13626 
check_atomic_image(uint32_t id)13627 bool CompilerGLSL::check_atomic_image(uint32_t id)
13628 {
13629 	auto &type = expression_type(id);
13630 	if (type.storage == StorageClassImage)
13631 	{
13632 		if (options.es && options.version < 320)
13633 			require_extension_internal("GL_OES_shader_image_atomic");
13634 
13635 		auto *var = maybe_get_backing_variable(id);
13636 		if (var)
13637 		{
13638 			auto &flags = ir.meta[var->self].decoration.decoration_flags;
13639 			if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
13640 			{
13641 				flags.clear(DecorationNonWritable);
13642 				flags.clear(DecorationNonReadable);
13643 				force_recompile();
13644 			}
13645 		}
13646 		return true;
13647 	}
13648 	else
13649 		return false;
13650 }
13651 
add_function_overload(const SPIRFunction & func)13652 void CompilerGLSL::add_function_overload(const SPIRFunction &func)
13653 {
13654 	Hasher hasher;
13655 	for (auto &arg : func.arguments)
13656 	{
13657 		// Parameters can vary with pointer type or not,
13658 		// but that will not change the signature in GLSL/HLSL,
13659 		// so strip the pointer type before hashing.
13660 		uint32_t type_id = get_pointee_type_id(arg.type);
13661 		auto &type = get<SPIRType>(type_id);
13662 
13663 		if (!combined_image_samplers.empty())
13664 		{
13665 			// If we have combined image samplers, we cannot really trust the image and sampler arguments
13666 			// we pass down to callees, because they may be shuffled around.
13667 			// Ignore these arguments, to make sure that functions need to differ in some other way
13668 			// to be considered different overloads.
13669 			if (type.basetype == SPIRType::SampledImage ||
13670 			    (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
13671 			{
13672 				continue;
13673 			}
13674 		}
13675 
13676 		hasher.u32(type_id);
13677 	}
13678 	uint64_t types_hash = hasher.get();
13679 
13680 	auto function_name = to_name(func.self);
13681 	auto itr = function_overloads.find(function_name);
13682 	if (itr != end(function_overloads))
13683 	{
13684 		// There exists a function with this name already.
13685 		auto &overloads = itr->second;
13686 		if (overloads.count(types_hash) != 0)
13687 		{
13688 			// Overload conflict, assign a new name.
13689 			add_resource_name(func.self);
13690 			function_overloads[to_name(func.self)].insert(types_hash);
13691 		}
13692 		else
13693 		{
13694 			// Can reuse the name.
13695 			overloads.insert(types_hash);
13696 		}
13697 	}
13698 	else
13699 	{
13700 		// First time we see this function name.
13701 		add_resource_name(func.self);
13702 		function_overloads[to_name(func.self)].insert(types_hash);
13703 	}
13704 }
13705 
emit_function_prototype(SPIRFunction & func,const Bitset & return_flags)13706 void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
13707 {
13708 	if (func.self != ir.default_entry_point)
13709 		add_function_overload(func);
13710 
13711 	// Avoid shadow declarations.
13712 	local_variable_names = resource_names;
13713 
13714 	string decl;
13715 
13716 	auto &type = get<SPIRType>(func.return_type);
13717 	decl += flags_to_qualifiers_glsl(type, return_flags);
13718 	decl += type_to_glsl(type);
13719 	decl += type_to_array_glsl(type);
13720 	decl += " ";
13721 
13722 	if (func.self == ir.default_entry_point)
13723 	{
13724 		// If we need complex fallback in GLSL, we just wrap main() in a function
13725 		// and interlock the entire shader ...
13726 		if (interlocked_is_complex)
13727 			decl += "spvMainInterlockedBody";
13728 		else
13729 			decl += "main";
13730 
13731 		processing_entry_point = true;
13732 	}
13733 	else
13734 		decl += to_name(func.self);
13735 
13736 	decl += "(";
13737 	SmallVector<string> arglist;
13738 	for (auto &arg : func.arguments)
13739 	{
13740 		// Do not pass in separate images or samplers if we're remapping
13741 		// to combined image samplers.
13742 		if (skip_argument(arg.id))
13743 			continue;
13744 
13745 		// Might change the variable name if it already exists in this function.
13746 		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13747 		// to use same name for variables.
13748 		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13749 		add_local_variable_name(arg.id);
13750 
13751 		arglist.push_back(argument_decl(arg));
13752 
13753 		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13754 		auto *var = maybe_get<SPIRVariable>(arg.id);
13755 		if (var)
13756 			var->parameter = &arg;
13757 	}
13758 
13759 	for (auto &arg : func.shadow_arguments)
13760 	{
13761 		// Might change the variable name if it already exists in this function.
13762 		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13763 		// to use same name for variables.
13764 		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13765 		add_local_variable_name(arg.id);
13766 
13767 		arglist.push_back(argument_decl(arg));
13768 
13769 		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13770 		auto *var = maybe_get<SPIRVariable>(arg.id);
13771 		if (var)
13772 			var->parameter = &arg;
13773 	}
13774 
13775 	decl += merge(arglist);
13776 	decl += ")";
13777 	statement(decl);
13778 }
13779 
emit_function(SPIRFunction & func,const Bitset & return_flags)13780 void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
13781 {
13782 	// Avoid potential cycles.
13783 	if (func.active)
13784 		return;
13785 	func.active = true;
13786 
13787 	// If we depend on a function, emit that function before we emit our own function.
13788 	for (auto block : func.blocks)
13789 	{
13790 		auto &b = get<SPIRBlock>(block);
13791 		for (auto &i : b.ops)
13792 		{
13793 			auto ops = stream(i);
13794 			auto op = static_cast<Op>(i.op);
13795 
13796 			if (op == OpFunctionCall)
13797 			{
13798 				// Recursively emit functions which are called.
13799 				uint32_t id = ops[2];
13800 				emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
13801 			}
13802 		}
13803 	}
13804 
13805 	if (func.entry_line.file_id != 0)
13806 		emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
13807 	emit_function_prototype(func, return_flags);
13808 	begin_scope();
13809 
13810 	if (func.self == ir.default_entry_point)
13811 		emit_entry_point_declarations();
13812 
13813 	current_function = &func;
13814 	auto &entry_block = get<SPIRBlock>(func.entry_block);
13815 
13816 	sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
13817 	for (auto &array : func.constant_arrays_needed_on_stack)
13818 	{
13819 		auto &c = get<SPIRConstant>(array);
13820 		auto &type = get<SPIRType>(c.constant_type);
13821 		statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
13822 	}
13823 
13824 	for (auto &v : func.local_variables)
13825 	{
13826 		auto &var = get<SPIRVariable>(v);
13827 		var.deferred_declaration = false;
13828 
13829 		if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
13830 		{
13831 			// Special variable type which cannot have initializer,
13832 			// need to be declared as standalone variables.
13833 			// Comes from MSL which can push global variables as local variables in main function.
13834 			add_local_variable_name(var.self);
13835 			statement(variable_decl(var), ";");
13836 			var.deferred_declaration = false;
13837 		}
13838 		else if (var.storage == StorageClassPrivate)
13839 		{
13840 			// These variables will not have had their CFG usage analyzed, so move it to the entry block.
13841 			// Comes from MSL which can push global variables as local variables in main function.
13842 			// We could just declare them right now, but we would miss out on an important initialization case which is
13843 			// LUT declaration in MSL.
13844 			// If we don't declare the variable when it is assigned we're forced to go through a helper function
13845 			// which copies elements one by one.
13846 			add_local_variable_name(var.self);
13847 
13848 			if (var.initializer)
13849 			{
13850 				statement(variable_decl(var), ";");
13851 				var.deferred_declaration = false;
13852 			}
13853 			else
13854 			{
13855 				auto &dominated = entry_block.dominated_variables;
13856 				if (find(begin(dominated), end(dominated), var.self) == end(dominated))
13857 					entry_block.dominated_variables.push_back(var.self);
13858 				var.deferred_declaration = true;
13859 			}
13860 		}
13861 		else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
13862 		{
13863 			// No need to declare this variable, it has a static expression.
13864 			var.deferred_declaration = false;
13865 		}
13866 		else if (expression_is_lvalue(v))
13867 		{
13868 			add_local_variable_name(var.self);
13869 
13870 			// Loop variables should never be declared early, they are explicitly emitted in a loop.
13871 			if (var.initializer && !var.loop_variable)
13872 				statement(variable_decl_function_local(var), ";");
13873 			else
13874 			{
13875 				// Don't declare variable until first use to declutter the GLSL output quite a lot.
13876 				// If we don't touch the variable before first branch,
13877 				// declare it then since we need variable declaration to be in top scope.
13878 				var.deferred_declaration = true;
13879 			}
13880 		}
13881 		else
13882 		{
13883 			// HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
13884 			// For these types (non-lvalue), we enforce forwarding through a shadowed variable.
13885 			// This means that when we OpStore to these variables, we just write in the expression ID directly.
13886 			// This breaks any kind of branching, since the variable must be statically assigned.
13887 			// Branching on samplers and images would be pretty much impossible to fake in GLSL.
13888 			var.statically_assigned = true;
13889 		}
13890 
13891 		var.loop_variable_enable = false;
13892 
13893 		// Loop variables are never declared outside their for-loop, so block any implicit declaration.
13894 		if (var.loop_variable)
13895 			var.deferred_declaration = false;
13896 	}
13897 
13898 	// Enforce declaration order for regression testing purposes.
13899 	for (auto &block_id : func.blocks)
13900 	{
13901 		auto &block = get<SPIRBlock>(block_id);
13902 		sort(begin(block.dominated_variables), end(block.dominated_variables));
13903 	}
13904 
13905 	for (auto &line : current_function->fixup_hooks_in)
13906 		line();
13907 
13908 	emit_block_chain(entry_block);
13909 
13910 	end_scope();
13911 	processing_entry_point = false;
13912 	statement("");
13913 
13914 	// Make sure deferred declaration state for local variables is cleared when we are done with function.
13915 	// We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
13916 	for (auto &v : func.local_variables)
13917 	{
13918 		auto &var = get<SPIRVariable>(v);
13919 		var.deferred_declaration = false;
13920 	}
13921 }
13922 
emit_fixup()13923 void CompilerGLSL::emit_fixup()
13924 {
13925 	if (is_vertex_like_shader())
13926 	{
13927 		if (options.vertex.fixup_clipspace)
13928 		{
13929 			const char *suffix = backend.float_literal_suffix ? "f" : "";
13930 			statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
13931 		}
13932 
13933 		if (options.vertex.flip_vert_y)
13934 			statement("gl_Position.y = -gl_Position.y;");
13935 	}
13936 }
13937 
flush_phi(BlockID from,BlockID to)13938 void CompilerGLSL::flush_phi(BlockID from, BlockID to)
13939 {
13940 	auto &child = get<SPIRBlock>(to);
13941 	if (child.ignore_phi_from_block == from)
13942 		return;
13943 
13944 	unordered_set<uint32_t> temporary_phi_variables;
13945 
13946 	for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
13947 	{
13948 		auto &phi = *itr;
13949 
13950 		if (phi.parent == from)
13951 		{
13952 			auto &var = get<SPIRVariable>(phi.function_variable);
13953 
13954 			// A Phi variable might be a loop variable, so flush to static expression.
13955 			if (var.loop_variable && !var.loop_variable_enable)
13956 				var.static_expression = phi.local_variable;
13957 			else
13958 			{
13959 				flush_variable_declaration(phi.function_variable);
13960 
13961 				// Check if we are going to write to a Phi variable that another statement will read from
13962 				// as part of another Phi node in our target block.
13963 				// For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
13964 				// This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
13965 				bool need_saved_temporary =
13966 				    find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
13967 					    return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
13968 				    }) != end(child.phi_variables);
13969 
13970 				if (need_saved_temporary)
13971 				{
13972 					// Need to make sure we declare the phi variable with a copy at the right scope.
13973 					// We cannot safely declare a temporary here since we might be inside a continue block.
13974 					if (!var.allocate_temporary_copy)
13975 					{
13976 						var.allocate_temporary_copy = true;
13977 						force_recompile();
13978 					}
13979 					statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
13980 					temporary_phi_variables.insert(phi.function_variable);
13981 				}
13982 
13983 				// This might be called in continue block, so make sure we
13984 				// use this to emit ESSL 1.0 compliant increments/decrements.
13985 				auto lhs = to_expression(phi.function_variable);
13986 
13987 				string rhs;
13988 				if (temporary_phi_variables.count(phi.local_variable))
13989 					rhs = join("_", phi.local_variable, "_copy");
13990 				else
13991 					rhs = to_pointer_expression(phi.local_variable);
13992 
13993 				if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
13994 					statement(lhs, " = ", rhs, ";");
13995 			}
13996 
13997 			register_write(phi.function_variable);
13998 		}
13999 	}
14000 }
14001 
branch_to_continue(BlockID from,BlockID to)14002 void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
14003 {
14004 	auto &to_block = get<SPIRBlock>(to);
14005 	if (from == to)
14006 		return;
14007 
14008 	assert(is_continue(to));
14009 	if (to_block.complex_continue)
14010 	{
14011 		// Just emit the whole block chain as is.
14012 		auto usage_counts = expression_usage_counts;
14013 
14014 		emit_block_chain(to_block);
14015 
14016 		// Expression usage counts are moot after returning from the continue block.
14017 		expression_usage_counts = usage_counts;
14018 	}
14019 	else
14020 	{
14021 		auto &from_block = get<SPIRBlock>(from);
14022 		bool outside_control_flow = false;
14023 		uint32_t loop_dominator = 0;
14024 
14025 		// FIXME: Refactor this to not use the old loop_dominator tracking.
14026 		if (from_block.merge_block)
14027 		{
14028 			// If we are a loop header, we don't set the loop dominator,
14029 			// so just use "self" here.
14030 			loop_dominator = from;
14031 		}
14032 		else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
14033 		{
14034 			loop_dominator = from_block.loop_dominator;
14035 		}
14036 
14037 		if (loop_dominator != 0)
14038 		{
14039 			auto &cfg = get_cfg_for_current_function();
14040 
14041 			// For non-complex continue blocks, we implicitly branch to the continue block
14042 			// by having the continue block be part of the loop header in for (; ; continue-block).
14043 			outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
14044 		}
14045 
14046 		// Some simplification for for-loops. We always end up with a useless continue;
14047 		// statement since we branch to a loop block.
14048 		// Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
14049 		// we can avoid writing out an explicit continue statement.
14050 		// Similar optimization to return statements if we know we're outside flow control.
14051 		if (!outside_control_flow)
14052 			statement("continue;");
14053 	}
14054 }
14055 
branch(BlockID from,BlockID to)14056 void CompilerGLSL::branch(BlockID from, BlockID to)
14057 {
14058 	flush_phi(from, to);
14059 	flush_control_dependent_expressions(from);
14060 
14061 	bool to_is_continue = is_continue(to);
14062 
14063 	// This is only a continue if we branch to our loop dominator.
14064 	if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
14065 	{
14066 		// This can happen if we had a complex continue block which was emitted.
14067 		// Once the continue block tries to branch to the loop header, just emit continue;
14068 		// and end the chain here.
14069 		statement("continue;");
14070 	}
14071 	else if (from != to && is_break(to))
14072 	{
14073 		// We cannot break to ourselves, so check explicitly for from != to.
14074 		// This case can trigger if a loop header is all three of these things:
14075 		// - Continue block
14076 		// - Loop header
14077 		// - Break merge target all at once ...
14078 
14079 		// Very dirty workaround.
14080 		// Switch constructs are able to break, but they cannot break out of a loop at the same time.
14081 		// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
14082 		// write to the ladder here, and defer the break.
14083 		// The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
14084 		if (current_emitting_switch && is_loop_break(to) &&
14085 		    current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
14086 		    get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
14087 		{
14088 			if (!current_emitting_switch->need_ladder_break)
14089 			{
14090 				force_recompile();
14091 				current_emitting_switch->need_ladder_break = true;
14092 			}
14093 
14094 			statement("_", current_emitting_switch->self, "_ladder_break = true;");
14095 		}
14096 		statement("break;");
14097 	}
14098 	else if (to_is_continue || from == to)
14099 	{
14100 		// For from == to case can happen for a do-while loop which branches into itself.
14101 		// We don't mark these cases as continue blocks, but the only possible way to branch into
14102 		// ourselves is through means of continue blocks.
14103 
14104 		// If we are merging to a continue block, there is no need to emit the block chain for continue here.
14105 		// We can branch to the continue block after we merge execution.
14106 
14107 		// Here we make use of structured control flow rules from spec:
14108 		// 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
14109 		//       - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
14110 		// If we are branching to a merge block, we must be inside a construct which dominates the merge block.
14111 		auto &block_meta = ir.block_meta[to];
14112 		bool branching_to_merge =
14113 		    (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
14114 		                   ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
14115 		if (!to_is_continue || !branching_to_merge)
14116 			branch_to_continue(from, to);
14117 	}
14118 	else if (!is_conditional(to))
14119 		emit_block_chain(get<SPIRBlock>(to));
14120 
14121 	// It is important that we check for break before continue.
14122 	// A block might serve two purposes, a break block for the inner scope, and
14123 	// a continue block in the outer scope.
14124 	// Inner scope always takes precedence.
14125 }
14126 
branch(BlockID from,uint32_t cond,BlockID true_block,BlockID false_block)14127 void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
14128 {
14129 	auto &from_block = get<SPIRBlock>(from);
14130 	BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
14131 
14132 	// If we branch directly to our selection merge target, we don't need a code path.
14133 	bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
14134 	bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
14135 
14136 	if (!true_block_needs_code && !false_block_needs_code)
14137 		return;
14138 
14139 	// We might have a loop merge here. Only consider selection flattening constructs.
14140 	// Loop hints are handled explicitly elsewhere.
14141 	if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
14142 		emit_block_hints(from_block);
14143 
14144 	if (true_block_needs_code)
14145 	{
14146 		statement("if (", to_expression(cond), ")");
14147 		begin_scope();
14148 		branch(from, true_block);
14149 		end_scope();
14150 
14151 		if (false_block_needs_code)
14152 		{
14153 			statement("else");
14154 			begin_scope();
14155 			branch(from, false_block);
14156 			end_scope();
14157 		}
14158 	}
14159 	else if (false_block_needs_code)
14160 	{
14161 		// Only need false path, use negative conditional.
14162 		statement("if (!", to_enclosed_expression(cond), ")");
14163 		begin_scope();
14164 		branch(from, false_block);
14165 		end_scope();
14166 	}
14167 }
14168 
14169 // FIXME: This currently cannot handle complex continue blocks
14170 // as in do-while.
14171 // This should be seen as a "trivial" continue block.
emit_continue_block(uint32_t continue_block,bool follow_true_block,bool follow_false_block)14172 string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
14173 {
14174 	auto *block = &get<SPIRBlock>(continue_block);
14175 
14176 	// While emitting the continue block, declare_temporary will check this
14177 	// if we have to emit temporaries.
14178 	current_continue_block = block;
14179 
14180 	SmallVector<string> statements;
14181 
14182 	// Capture all statements into our list.
14183 	auto *old = redirect_statement;
14184 	redirect_statement = &statements;
14185 
14186 	// Stamp out all blocks one after each other.
14187 	while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
14188 	{
14189 		// Write out all instructions we have in this block.
14190 		emit_block_instructions(*block);
14191 
14192 		// For plain branchless for/while continue blocks.
14193 		if (block->next_block)
14194 		{
14195 			flush_phi(continue_block, block->next_block);
14196 			block = &get<SPIRBlock>(block->next_block);
14197 		}
14198 		// For do while blocks. The last block will be a select block.
14199 		else if (block->true_block && follow_true_block)
14200 		{
14201 			flush_phi(continue_block, block->true_block);
14202 			block = &get<SPIRBlock>(block->true_block);
14203 		}
14204 		else if (block->false_block && follow_false_block)
14205 		{
14206 			flush_phi(continue_block, block->false_block);
14207 			block = &get<SPIRBlock>(block->false_block);
14208 		}
14209 		else
14210 		{
14211 			SPIRV_CROSS_THROW("Invalid continue block detected!");
14212 		}
14213 	}
14214 
14215 	// Restore old pointer.
14216 	redirect_statement = old;
14217 
14218 	// Somewhat ugly, strip off the last ';' since we use ',' instead.
14219 	// Ideally, we should select this behavior in statement().
14220 	for (auto &s : statements)
14221 	{
14222 		if (!s.empty() && s.back() == ';')
14223 			s.erase(s.size() - 1, 1);
14224 	}
14225 
14226 	current_continue_block = nullptr;
14227 	return merge(statements);
14228 }
14229 
emit_while_loop_initializers(const SPIRBlock & block)14230 void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
14231 {
14232 	// While loops do not take initializers, so declare all of them outside.
14233 	for (auto &loop_var : block.loop_variables)
14234 	{
14235 		auto &var = get<SPIRVariable>(loop_var);
14236 		statement(variable_decl(var), ";");
14237 	}
14238 }
14239 
emit_for_loop_initializers(const SPIRBlock & block)14240 string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
14241 {
14242 	if (block.loop_variables.empty())
14243 		return "";
14244 
14245 	bool same_types = for_loop_initializers_are_same_type(block);
14246 	// We can only declare for loop initializers if all variables are of same type.
14247 	// If we cannot do this, declare individual variables before the loop header.
14248 
14249 	// We might have a loop variable candidate which was not assigned to for some reason.
14250 	uint32_t missing_initializers = 0;
14251 	for (auto &variable : block.loop_variables)
14252 	{
14253 		uint32_t expr = get<SPIRVariable>(variable).static_expression;
14254 
14255 		// Sometimes loop variables are initialized with OpUndef, but we can just declare
14256 		// a plain variable without initializer in this case.
14257 		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
14258 			missing_initializers++;
14259 	}
14260 
14261 	if (block.loop_variables.size() == 1 && missing_initializers == 0)
14262 	{
14263 		return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
14264 	}
14265 	else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
14266 	{
14267 		for (auto &loop_var : block.loop_variables)
14268 			statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
14269 		return "";
14270 	}
14271 	else
14272 	{
14273 		// We have a mix of loop variables, either ones with a clear initializer, or ones without.
14274 		// Separate the two streams.
14275 		string expr;
14276 
14277 		for (auto &loop_var : block.loop_variables)
14278 		{
14279 			uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
14280 			if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
14281 			{
14282 				statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
14283 			}
14284 			else
14285 			{
14286 				auto &var = get<SPIRVariable>(loop_var);
14287 				auto &type = get_variable_data_type(var);
14288 				if (expr.empty())
14289 				{
14290 					// For loop initializers are of the form <type id = value, id = value, id = value, etc ...
14291 					expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
14292 				}
14293 				else
14294 				{
14295 					expr += ", ";
14296 					// In MSL, being based on C++, the asterisk marking a pointer
14297 					// binds to the identifier, not the type.
14298 					if (type.pointer)
14299 						expr += "* ";
14300 				}
14301 
14302 				expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
14303 			}
14304 		}
14305 		return expr;
14306 	}
14307 }
14308 
for_loop_initializers_are_same_type(const SPIRBlock & block)14309 bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
14310 {
14311 	if (block.loop_variables.size() <= 1)
14312 		return true;
14313 
14314 	uint32_t expected = 0;
14315 	Bitset expected_flags;
14316 	for (auto &var : block.loop_variables)
14317 	{
14318 		// Don't care about uninitialized variables as they will not be part of the initializers.
14319 		uint32_t expr = get<SPIRVariable>(var).static_expression;
14320 		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
14321 			continue;
14322 
14323 		if (expected == 0)
14324 		{
14325 			expected = get<SPIRVariable>(var).basetype;
14326 			expected_flags = get_decoration_bitset(var);
14327 		}
14328 		else if (expected != get<SPIRVariable>(var).basetype)
14329 			return false;
14330 
14331 		// Precision flags and things like that must also match.
14332 		if (expected_flags != get_decoration_bitset(var))
14333 			return false;
14334 	}
14335 
14336 	return true;
14337 }
14338 
attempt_emit_loop_header(SPIRBlock & block,SPIRBlock::Method method)14339 bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
14340 {
14341 	SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14342 
14343 	if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
14344 	{
14345 		uint32_t current_count = statement_count;
14346 		// If we're trying to create a true for loop,
14347 		// we need to make sure that all opcodes before branch statement do not actually emit any code.
14348 		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14349 		emit_block_instructions(block);
14350 
14351 		bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
14352 
14353 		// This can work! We only did trivial things which could be forwarded in block body!
14354 		if (current_count == statement_count && condition_is_temporary)
14355 		{
14356 			switch (continue_type)
14357 			{
14358 			case SPIRBlock::ForLoop:
14359 			{
14360 				// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14361 				flush_undeclared_variables(block);
14362 
14363 				// Important that we do this in this order because
14364 				// emitting the continue block can invalidate the condition expression.
14365 				auto initializer = emit_for_loop_initializers(block);
14366 				auto condition = to_expression(block.condition);
14367 
14368 				// Condition might have to be inverted.
14369 				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14370 					condition = join("!", enclose_expression(condition));
14371 
14372 				emit_block_hints(block);
14373 				if (method != SPIRBlock::MergeToSelectContinueForLoop)
14374 				{
14375 					auto continue_block = emit_continue_block(block.continue_block, false, false);
14376 					statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14377 				}
14378 				else
14379 					statement("for (", initializer, "; ", condition, "; )");
14380 				break;
14381 			}
14382 
14383 			case SPIRBlock::WhileLoop:
14384 			{
14385 				// This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
14386 				flush_undeclared_variables(block);
14387 				emit_while_loop_initializers(block);
14388 				emit_block_hints(block);
14389 
14390 				auto condition = to_expression(block.condition);
14391 				// Condition might have to be inverted.
14392 				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14393 					condition = join("!", enclose_expression(condition));
14394 
14395 				statement("while (", condition, ")");
14396 				break;
14397 			}
14398 
14399 			default:
14400 				block.disable_block_optimization = true;
14401 				force_recompile();
14402 				begin_scope(); // We'll see an end_scope() later.
14403 				return false;
14404 			}
14405 
14406 			begin_scope();
14407 			return true;
14408 		}
14409 		else
14410 		{
14411 			block.disable_block_optimization = true;
14412 			force_recompile();
14413 			begin_scope(); // We'll see an end_scope() later.
14414 			return false;
14415 		}
14416 	}
14417 	else if (method == SPIRBlock::MergeToDirectForLoop)
14418 	{
14419 		auto &child = get<SPIRBlock>(block.next_block);
14420 
14421 		// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14422 		flush_undeclared_variables(child);
14423 
14424 		uint32_t current_count = statement_count;
14425 
14426 		// If we're trying to create a true for loop,
14427 		// we need to make sure that all opcodes before branch statement do not actually emit any code.
14428 		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14429 		emit_block_instructions(child);
14430 
14431 		bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
14432 
14433 		if (current_count == statement_count && condition_is_temporary)
14434 		{
14435 			uint32_t target_block = child.true_block;
14436 
14437 			switch (continue_type)
14438 			{
14439 			case SPIRBlock::ForLoop:
14440 			{
14441 				// Important that we do this in this order because
14442 				// emitting the continue block can invalidate the condition expression.
14443 				auto initializer = emit_for_loop_initializers(block);
14444 				auto condition = to_expression(child.condition);
14445 
14446 				// Condition might have to be inverted.
14447 				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14448 				{
14449 					condition = join("!", enclose_expression(condition));
14450 					target_block = child.false_block;
14451 				}
14452 
14453 				auto continue_block = emit_continue_block(block.continue_block, false, false);
14454 				emit_block_hints(block);
14455 				statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14456 				break;
14457 			}
14458 
14459 			case SPIRBlock::WhileLoop:
14460 			{
14461 				emit_while_loop_initializers(block);
14462 				emit_block_hints(block);
14463 
14464 				auto condition = to_expression(child.condition);
14465 				// Condition might have to be inverted.
14466 				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14467 				{
14468 					condition = join("!", enclose_expression(condition));
14469 					target_block = child.false_block;
14470 				}
14471 
14472 				statement("while (", condition, ")");
14473 				break;
14474 			}
14475 
14476 			default:
14477 				block.disable_block_optimization = true;
14478 				force_recompile();
14479 				begin_scope(); // We'll see an end_scope() later.
14480 				return false;
14481 			}
14482 
14483 			begin_scope();
14484 			branch(child.self, target_block);
14485 			return true;
14486 		}
14487 		else
14488 		{
14489 			block.disable_block_optimization = true;
14490 			force_recompile();
14491 			begin_scope(); // We'll see an end_scope() later.
14492 			return false;
14493 		}
14494 	}
14495 	else
14496 		return false;
14497 }
14498 
flush_undeclared_variables(SPIRBlock & block)14499 void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
14500 {
14501 	for (auto &v : block.dominated_variables)
14502 		flush_variable_declaration(v);
14503 }
14504 
emit_hoisted_temporaries(SmallVector<pair<TypeID,ID>> & temporaries)14505 void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
14506 {
14507 	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
14508 	// Need to sort these to ensure that reference output is stable.
14509 	sort(begin(temporaries), end(temporaries),
14510 	     [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
14511 
14512 	for (auto &tmp : temporaries)
14513 	{
14514 		add_local_variable_name(tmp.second);
14515 		auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
14516 		auto &type = get<SPIRType>(tmp.first);
14517 
14518 		// Not all targets support pointer literals, so don't bother with that case.
14519 		string initializer;
14520 		if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
14521 			initializer = join(" = ", to_zero_initialized_expression(tmp.first));
14522 
14523 		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
14524 
14525 		hoisted_temporaries.insert(tmp.second);
14526 		forced_temporaries.insert(tmp.second);
14527 
14528 		// The temporary might be read from before it's assigned, set up the expression now.
14529 		set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
14530 	}
14531 }
14532 
emit_block_chain(SPIRBlock & block)14533 void CompilerGLSL::emit_block_chain(SPIRBlock &block)
14534 {
14535 	bool select_branch_to_true_block = false;
14536 	bool select_branch_to_false_block = false;
14537 	bool skip_direct_branch = false;
14538 	bool emitted_loop_header_variables = false;
14539 	bool force_complex_continue_block = false;
14540 	ValueSaver<uint32_t> loop_level_saver(current_loop_level);
14541 
14542 	if (block.merge == SPIRBlock::MergeLoop)
14543 		add_loop_level();
14544 
14545 	emit_hoisted_temporaries(block.declare_temporary);
14546 
14547 	SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
14548 	if (block.continue_block)
14549 	{
14550 		continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14551 		// If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
14552 		if (continue_type == SPIRBlock::ComplexLoop)
14553 			block.complex_continue = true;
14554 	}
14555 
14556 	// If we have loop variables, stop masking out access to the variable now.
14557 	for (auto var_id : block.loop_variables)
14558 	{
14559 		auto &var = get<SPIRVariable>(var_id);
14560 		var.loop_variable_enable = true;
14561 		// We're not going to declare the variable directly, so emit a copy here.
14562 		emit_variable_temporary_copies(var);
14563 	}
14564 
14565 	// Remember deferred declaration state. We will restore it before returning.
14566 	SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
14567 	for (size_t i = 0; i < block.dominated_variables.size(); i++)
14568 	{
14569 		uint32_t var_id = block.dominated_variables[i];
14570 		auto &var = get<SPIRVariable>(var_id);
14571 		rearm_dominated_variables[i] = var.deferred_declaration;
14572 	}
14573 
14574 	// This is the method often used by spirv-opt to implement loops.
14575 	// The loop header goes straight into the continue block.
14576 	// However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
14577 	// it *MUST* be used in the continue block. This loop method will not work.
14578 	if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
14579 	{
14580 		flush_undeclared_variables(block);
14581 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
14582 		{
14583 			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14584 				select_branch_to_false_block = true;
14585 			else
14586 				select_branch_to_true_block = true;
14587 
14588 			emitted_loop_header_variables = true;
14589 			force_complex_continue_block = true;
14590 		}
14591 	}
14592 	// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
14593 	else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
14594 	{
14595 		flush_undeclared_variables(block);
14596 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
14597 		{
14598 			// The body of while, is actually just the true (or false) block, so always branch there unconditionally.
14599 			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14600 				select_branch_to_false_block = true;
14601 			else
14602 				select_branch_to_true_block = true;
14603 
14604 			emitted_loop_header_variables = true;
14605 		}
14606 	}
14607 	// This is the newer loop behavior in glslang which branches from Loop header directly to
14608 	// a new block, which in turn has a OpBranchSelection without a selection merge.
14609 	else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
14610 	{
14611 		flush_undeclared_variables(block);
14612 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
14613 		{
14614 			skip_direct_branch = true;
14615 			emitted_loop_header_variables = true;
14616 		}
14617 	}
14618 	else if (continue_type == SPIRBlock::DoWhileLoop)
14619 	{
14620 		flush_undeclared_variables(block);
14621 		emit_while_loop_initializers(block);
14622 		emitted_loop_header_variables = true;
14623 		// We have some temporaries where the loop header is the dominator.
14624 		// We risk a case where we have code like:
14625 		// for (;;) { create-temporary; break; } consume-temporary;
14626 		// so force-declare temporaries here.
14627 		emit_hoisted_temporaries(block.potential_declare_temporary);
14628 		statement("do");
14629 		begin_scope();
14630 
14631 		emit_block_instructions(block);
14632 	}
14633 	else if (block.merge == SPIRBlock::MergeLoop)
14634 	{
14635 		flush_undeclared_variables(block);
14636 		emit_while_loop_initializers(block);
14637 		emitted_loop_header_variables = true;
14638 
14639 		// We have a generic loop without any distinguishable pattern like for, while or do while.
14640 		get<SPIRBlock>(block.continue_block).complex_continue = true;
14641 		continue_type = SPIRBlock::ComplexLoop;
14642 
14643 		// We have some temporaries where the loop header is the dominator.
14644 		// We risk a case where we have code like:
14645 		// for (;;) { create-temporary; break; } consume-temporary;
14646 		// so force-declare temporaries here.
14647 		emit_hoisted_temporaries(block.potential_declare_temporary);
14648 		emit_block_hints(block);
14649 		statement("for (;;)");
14650 		begin_scope();
14651 
14652 		emit_block_instructions(block);
14653 	}
14654 	else
14655 	{
14656 		emit_block_instructions(block);
14657 	}
14658 
14659 	// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
14660 	// as writes to said loop variables might have been masked out, we need a recompile.
14661 	if (!emitted_loop_header_variables && !block.loop_variables.empty())
14662 	{
14663 		force_recompile();
14664 		for (auto var : block.loop_variables)
14665 			get<SPIRVariable>(var).loop_variable = false;
14666 		block.loop_variables.clear();
14667 	}
14668 
14669 	flush_undeclared_variables(block);
14670 	bool emit_next_block = true;
14671 
14672 	// Handle end of block.
14673 	switch (block.terminator)
14674 	{
14675 	case SPIRBlock::Direct:
14676 		// True when emitting complex continue block.
14677 		if (block.loop_dominator == block.next_block)
14678 		{
14679 			branch(block.self, block.next_block);
14680 			emit_next_block = false;
14681 		}
14682 		// True if MergeToDirectForLoop succeeded.
14683 		else if (skip_direct_branch)
14684 			emit_next_block = false;
14685 		else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
14686 		{
14687 			branch(block.self, block.next_block);
14688 			emit_next_block = false;
14689 		}
14690 		break;
14691 
14692 	case SPIRBlock::Select:
14693 		// True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
14694 		if (select_branch_to_true_block)
14695 		{
14696 			if (force_complex_continue_block)
14697 			{
14698 				assert(block.true_block == block.continue_block);
14699 
14700 				// We're going to emit a continue block directly here, so make sure it's marked as complex.
14701 				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14702 				bool old_complex = complex_continue;
14703 				complex_continue = true;
14704 				branch(block.self, block.true_block);
14705 				complex_continue = old_complex;
14706 			}
14707 			else
14708 				branch(block.self, block.true_block);
14709 		}
14710 		else if (select_branch_to_false_block)
14711 		{
14712 			if (force_complex_continue_block)
14713 			{
14714 				assert(block.false_block == block.continue_block);
14715 
14716 				// We're going to emit a continue block directly here, so make sure it's marked as complex.
14717 				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14718 				bool old_complex = complex_continue;
14719 				complex_continue = true;
14720 				branch(block.self, block.false_block);
14721 				complex_continue = old_complex;
14722 			}
14723 			else
14724 				branch(block.self, block.false_block);
14725 		}
14726 		else
14727 			branch(block.self, block.condition, block.true_block, block.false_block);
14728 		break;
14729 
14730 	case SPIRBlock::MultiSelect:
14731 	{
14732 		auto &type = expression_type(block.condition);
14733 		bool unsigned_case =
14734 		    type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
14735 
14736 		if (block.merge == SPIRBlock::MergeNone)
14737 			SPIRV_CROSS_THROW("Switch statement is not structured");
14738 
14739 		if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
14740 		{
14741 			// SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
14742 			SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
14743 		}
14744 
14745 		const char *label_suffix = "";
14746 		if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
14747 			label_suffix = "u";
14748 		else if (type.basetype == SPIRType::UShort)
14749 			label_suffix = backend.uint16_t_literal_suffix;
14750 		else if (type.basetype == SPIRType::Short)
14751 			label_suffix = backend.int16_t_literal_suffix;
14752 
14753 		SPIRBlock *old_emitting_switch = current_emitting_switch;
14754 		current_emitting_switch = &block;
14755 
14756 		if (block.need_ladder_break)
14757 			statement("bool _", block.self, "_ladder_break = false;");
14758 
14759 		// Find all unique case constructs.
14760 		unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
14761 		SmallVector<uint32_t> block_declaration_order;
14762 		SmallVector<uint32_t> literals_to_merge;
14763 
14764 		// If a switch case branches to the default block for some reason, we can just remove that literal from consideration
14765 		// and let the default: block handle it.
14766 		// 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
14767 		// We only need to consider possible fallthrough if order[i] branches to order[i + 1].
14768 		for (auto &c : block.cases)
14769 		{
14770 			if (c.block != block.next_block && c.block != block.default_block)
14771 			{
14772 				if (!case_constructs.count(c.block))
14773 					block_declaration_order.push_back(c.block);
14774 				case_constructs[c.block].push_back(c.value);
14775 			}
14776 			else if (c.block == block.next_block && block.default_block != block.next_block)
14777 			{
14778 				// We might have to flush phi inside specific case labels.
14779 				// If we can piggyback on default:, do so instead.
14780 				literals_to_merge.push_back(c.value);
14781 			}
14782 		}
14783 
14784 		// Empty literal array -> default.
14785 		if (block.default_block != block.next_block)
14786 		{
14787 			auto &default_block = get<SPIRBlock>(block.default_block);
14788 
14789 			// We need to slide in the default block somewhere in this chain
14790 			// if there are fall-through scenarios since the default is declared separately in OpSwitch.
14791 			// Only consider trivial fall-through cases here.
14792 			size_t num_blocks = block_declaration_order.size();
14793 			bool injected_block = false;
14794 
14795 			for (size_t i = 0; i < num_blocks; i++)
14796 			{
14797 				auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
14798 				if (execution_is_direct_branch(case_block, default_block))
14799 				{
14800 					// Fallthrough to default block, we must inject the default block here.
14801 					block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
14802 					injected_block = true;
14803 					break;
14804 				}
14805 				else if (execution_is_direct_branch(default_block, case_block))
14806 				{
14807 					// Default case is falling through to another case label, we must inject the default block here.
14808 					block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
14809 					injected_block = true;
14810 					break;
14811 				}
14812 			}
14813 
14814 			// Order does not matter.
14815 			if (!injected_block)
14816 				block_declaration_order.push_back(block.default_block);
14817 			else if (is_legacy_es())
14818 				SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
14819 
14820 			case_constructs[block.default_block] = {};
14821 		}
14822 
14823 		size_t num_blocks = block_declaration_order.size();
14824 
14825 		const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
14826 			return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
14827 		};
14828 
14829 		const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint32_t> &labels,
14830 		                                      const char *suffix) -> string {
14831 			string ret;
14832 			size_t count = labels.size();
14833 			for (size_t i = 0; i < count; i++)
14834 			{
14835 				if (i)
14836 					ret += " || ";
14837 				ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
14838 				            count > 1 ? ")" : "");
14839 			}
14840 			return ret;
14841 		};
14842 
14843 		// We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
14844 		// we need to flush phi nodes outside the switch block in a branch,
14845 		// and skip any Phi handling inside the case label to make fall-through work as expected.
14846 		// This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
14847 		// inside the case label if at all possible.
14848 		for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
14849 		{
14850 			if (flush_phi_required(block.self, block_declaration_order[i]) &&
14851 			    flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
14852 			{
14853 				uint32_t target_block = block_declaration_order[i];
14854 
14855 				// Make sure we flush Phi, it might have been marked to be ignored earlier.
14856 				get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
14857 
14858 				auto &literals = case_constructs[target_block];
14859 
14860 				if (literals.empty())
14861 				{
14862 					// Oh boy, gotta make a complete negative test instead! o.o
14863 					// Find all possible literals that would *not* make us enter the default block.
14864 					// If none of those literals match, we flush Phi ...
14865 					SmallVector<string> conditions;
14866 					for (size_t j = 0; j < num_blocks; j++)
14867 					{
14868 						auto &negative_literals = case_constructs[block_declaration_order[j]];
14869 						for (auto &case_label : negative_literals)
14870 							conditions.push_back(join(to_enclosed_expression(block.condition),
14871 							                          " != ", to_case_label(case_label, unsigned_case)));
14872 					}
14873 
14874 					statement("if (", merge(conditions, " && "), ")");
14875 					begin_scope();
14876 					flush_phi(block.self, target_block);
14877 					end_scope();
14878 				}
14879 				else
14880 				{
14881 					SmallVector<string> conditions;
14882 					conditions.reserve(literals.size());
14883 					for (auto &case_label : literals)
14884 						conditions.push_back(join(to_enclosed_expression(block.condition),
14885 						                          " == ", to_case_label(case_label, unsigned_case)));
14886 					statement("if (", merge(conditions, " || "), ")");
14887 					begin_scope();
14888 					flush_phi(block.self, target_block);
14889 					end_scope();
14890 				}
14891 
14892 				// Mark the block so that we don't flush Phi from header to case label.
14893 				get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
14894 			}
14895 		}
14896 
14897 		// If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
14898 		// non-structured exits with the help of a switch block.
14899 		// This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
14900 		bool degenerate_switch = block.default_block != block.merge_block && block.cases.empty();
14901 
14902 		if (degenerate_switch || is_legacy_es())
14903 		{
14904 			// ESSL 1.0 is not guaranteed to support do/while.
14905 			if (is_legacy_es())
14906 			{
14907 				uint32_t counter = statement_count;
14908 				statement("for (int spvDummy", counter, " = 0; spvDummy", counter,
14909 				          " < 1; spvDummy", counter, "++)");
14910 			}
14911 			else
14912 				statement("do");
14913 		}
14914 		else
14915 		{
14916 			emit_block_hints(block);
14917 			statement("switch (", to_expression(block.condition), ")");
14918 		}
14919 		begin_scope();
14920 
14921 		for (size_t i = 0; i < num_blocks; i++)
14922 		{
14923 			uint32_t target_block = block_declaration_order[i];
14924 			auto &literals = case_constructs[target_block];
14925 
14926 			if (literals.empty())
14927 			{
14928 				// Default case.
14929 				if (!degenerate_switch)
14930 				{
14931 					if (is_legacy_es())
14932 						statement("else");
14933 					else
14934 						statement("default:");
14935 				}
14936 			}
14937 			else
14938 			{
14939 				if (is_legacy_es())
14940 				{
14941 					statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
14942 					          ")");
14943 				}
14944 				else
14945 				{
14946 					for (auto &case_literal : literals)
14947 					{
14948 						// The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
14949 						statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
14950 					}
14951 				}
14952 			}
14953 
14954 			auto &case_block = get<SPIRBlock>(target_block);
14955 			if (backend.support_case_fallthrough && i + 1 < num_blocks &&
14956 			    execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
14957 			{
14958 				// We will fall through here, so just terminate the block chain early.
14959 				// We still need to deal with Phi potentially.
14960 				// No need for a stack-like thing here since we only do fall-through when there is a
14961 				// single trivial branch to fall-through target..
14962 				current_emitting_switch_fallthrough = true;
14963 			}
14964 			else
14965 				current_emitting_switch_fallthrough = false;
14966 
14967 			if (!degenerate_switch)
14968 				begin_scope();
14969 			branch(block.self, target_block);
14970 			if (!degenerate_switch)
14971 				end_scope();
14972 
14973 			current_emitting_switch_fallthrough = false;
14974 		}
14975 
14976 		// Might still have to flush phi variables if we branch from loop header directly to merge target.
14977 		if (flush_phi_required(block.self, block.next_block))
14978 		{
14979 			if (block.default_block == block.next_block || !literals_to_merge.empty())
14980 			{
14981 				for (auto &case_literal : literals_to_merge)
14982 					statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
14983 
14984 				if (block.default_block == block.next_block)
14985 				{
14986 					if (is_legacy_es())
14987 						statement("else");
14988 					else
14989 						statement("default:");
14990 				}
14991 
14992 				begin_scope();
14993 				flush_phi(block.self, block.next_block);
14994 				statement("break;");
14995 				end_scope();
14996 			}
14997 		}
14998 
14999 		if (degenerate_switch && !is_legacy_es())
15000 			end_scope_decl("while(false)");
15001 		else
15002 			end_scope();
15003 
15004 		if (block.need_ladder_break)
15005 		{
15006 			statement("if (_", block.self, "_ladder_break)");
15007 			begin_scope();
15008 			statement("break;");
15009 			end_scope();
15010 		}
15011 
15012 		current_emitting_switch = old_emitting_switch;
15013 		break;
15014 	}
15015 
15016 	case SPIRBlock::Return:
15017 	{
15018 		for (auto &line : current_function->fixup_hooks_out)
15019 			line();
15020 
15021 		if (processing_entry_point)
15022 			emit_fixup();
15023 
15024 		auto &cfg = get_cfg_for_current_function();
15025 
15026 		if (block.return_value)
15027 		{
15028 			auto &type = expression_type(block.return_value);
15029 			if (!type.array.empty() && !backend.can_return_array)
15030 			{
15031 				// If we cannot return arrays, we will have a special out argument we can write to instead.
15032 				// The backend is responsible for setting this up, and redirection the return values as appropriate.
15033 				if (ir.ids[block.return_value].get_type() != TypeUndef)
15034 				{
15035 					emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
15036 					                get_expression_effective_storage_class(block.return_value));
15037 				}
15038 
15039 				if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
15040 				    block.loop_dominator != BlockID(SPIRBlock::NoDominator))
15041 				{
15042 					statement("return;");
15043 				}
15044 			}
15045 			else
15046 			{
15047 				// OpReturnValue can return Undef, so don't emit anything for this case.
15048 				if (ir.ids[block.return_value].get_type() != TypeUndef)
15049 					statement("return ", to_expression(block.return_value), ";");
15050 			}
15051 		}
15052 		else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
15053 		         block.loop_dominator != BlockID(SPIRBlock::NoDominator))
15054 		{
15055 			// If this block is the very final block and not called from control flow,
15056 			// we do not need an explicit return which looks out of place. Just end the function here.
15057 			// In the very weird case of for(;;) { return; } executing return is unconditional,
15058 			// but we actually need a return here ...
15059 			statement("return;");
15060 		}
15061 		break;
15062 	}
15063 
15064 	case SPIRBlock::Kill:
15065 		statement(backend.discard_literal, ";");
15066 		break;
15067 
15068 	case SPIRBlock::Unreachable:
15069 		emit_next_block = false;
15070 		break;
15071 
15072 	case SPIRBlock::IgnoreIntersection:
15073 		statement("ignoreIntersectionEXT;");
15074 		break;
15075 
15076 	case SPIRBlock::TerminateRay:
15077 		statement("terminateRayEXT;");
15078 		break;
15079 
15080 	default:
15081 		SPIRV_CROSS_THROW("Unimplemented block terminator.");
15082 	}
15083 
15084 	if (block.next_block && emit_next_block)
15085 	{
15086 		// If we hit this case, we're dealing with an unconditional branch, which means we will output
15087 		// that block after this. If we had selection merge, we already flushed phi variables.
15088 		if (block.merge != SPIRBlock::MergeSelection)
15089 		{
15090 			flush_phi(block.self, block.next_block);
15091 			// For a direct branch, need to remember to invalidate expressions in the next linear block instead.
15092 			get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
15093 		}
15094 
15095 		// For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
15096 		if (!current_emitting_switch_fallthrough)
15097 		{
15098 			// For merge selects we might have ignored the fact that a merge target
15099 			// could have been a break; or continue;
15100 			// We will need to deal with it here.
15101 			if (is_loop_break(block.next_block))
15102 			{
15103 				// Cannot check for just break, because switch statements will also use break.
15104 				assert(block.merge == SPIRBlock::MergeSelection);
15105 				statement("break;");
15106 			}
15107 			else if (is_continue(block.next_block))
15108 			{
15109 				assert(block.merge == SPIRBlock::MergeSelection);
15110 				branch_to_continue(block.self, block.next_block);
15111 			}
15112 			else if (BlockID(block.self) != block.next_block)
15113 				emit_block_chain(get<SPIRBlock>(block.next_block));
15114 		}
15115 	}
15116 
15117 	if (block.merge == SPIRBlock::MergeLoop)
15118 	{
15119 		if (continue_type == SPIRBlock::DoWhileLoop)
15120 		{
15121 			// Make sure that we run the continue block to get the expressions set, but this
15122 			// should become an empty string.
15123 			// We have no fallbacks if we cannot forward everything to temporaries ...
15124 			const auto &continue_block = get<SPIRBlock>(block.continue_block);
15125 			bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
15126 			                                       get<SPIRBlock>(continue_block.loop_dominator));
15127 
15128 			uint32_t current_count = statement_count;
15129 			auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
15130 			if (statement_count != current_count)
15131 			{
15132 				// The DoWhile block has side effects, force ComplexLoop pattern next pass.
15133 				get<SPIRBlock>(block.continue_block).complex_continue = true;
15134 				force_recompile();
15135 			}
15136 
15137 			// Might have to invert the do-while test here.
15138 			auto condition = to_expression(continue_block.condition);
15139 			if (!positive_test)
15140 				condition = join("!", enclose_expression(condition));
15141 
15142 			end_scope_decl(join("while (", condition, ")"));
15143 		}
15144 		else
15145 			end_scope();
15146 
15147 		loop_level_saver.release();
15148 
15149 		// We cannot break out of two loops at once, so don't check for break; here.
15150 		// Using block.self as the "from" block isn't quite right, but it has the same scope
15151 		// and dominance structure, so it's fine.
15152 		if (is_continue(block.merge_block))
15153 			branch_to_continue(block.self, block.merge_block);
15154 		else
15155 			emit_block_chain(get<SPIRBlock>(block.merge_block));
15156 	}
15157 
15158 	// Forget about control dependent expressions now.
15159 	block.invalidate_expressions.clear();
15160 
15161 	// After we return, we must be out of scope, so if we somehow have to re-emit this function,
15162 	// re-declare variables if necessary.
15163 	assert(rearm_dominated_variables.size() == block.dominated_variables.size());
15164 	for (size_t i = 0; i < block.dominated_variables.size(); i++)
15165 	{
15166 		uint32_t var = block.dominated_variables[i];
15167 		get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
15168 	}
15169 
15170 	// Just like for deferred declaration, we need to forget about loop variable enable
15171 	// if our block chain is reinstantiated later.
15172 	for (auto &var_id : block.loop_variables)
15173 		get<SPIRVariable>(var_id).loop_variable_enable = false;
15174 }
15175 
begin_scope()15176 void CompilerGLSL::begin_scope()
15177 {
15178 	statement("{");
15179 	indent++;
15180 }
15181 
end_scope()15182 void CompilerGLSL::end_scope()
15183 {
15184 	if (!indent)
15185 		SPIRV_CROSS_THROW("Popping empty indent stack.");
15186 	indent--;
15187 	statement("}");
15188 }
15189 
end_scope(const string & trailer)15190 void CompilerGLSL::end_scope(const string &trailer)
15191 {
15192 	if (!indent)
15193 		SPIRV_CROSS_THROW("Popping empty indent stack.");
15194 	indent--;
15195 	statement("}", trailer);
15196 }
15197 
end_scope_decl()15198 void CompilerGLSL::end_scope_decl()
15199 {
15200 	if (!indent)
15201 		SPIRV_CROSS_THROW("Popping empty indent stack.");
15202 	indent--;
15203 	statement("};");
15204 }
15205 
end_scope_decl(const string & decl)15206 void CompilerGLSL::end_scope_decl(const string &decl)
15207 {
15208 	if (!indent)
15209 		SPIRV_CROSS_THROW("Popping empty indent stack.");
15210 	indent--;
15211 	statement("} ", decl, ";");
15212 }
15213 
check_function_call_constraints(const uint32_t * args,uint32_t length)15214 void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
15215 {
15216 	// If our variable is remapped, and we rely on type-remapping information as
15217 	// well, then we cannot pass the variable as a function parameter.
15218 	// Fixing this is non-trivial without stamping out variants of the same function,
15219 	// so for now warn about this and suggest workarounds instead.
15220 	for (uint32_t i = 0; i < length; i++)
15221 	{
15222 		auto *var = maybe_get<SPIRVariable>(args[i]);
15223 		if (!var || !var->remapped_variable)
15224 			continue;
15225 
15226 		auto &type = get<SPIRType>(var->basetype);
15227 		if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
15228 		{
15229 			SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
15230 			                  "This will not work correctly because type-remapping information is lost. "
15231 			                  "To workaround, please consider not passing the subpass input as a function parameter, "
15232 			                  "or use in/out variables instead which do not need type remapping information.");
15233 		}
15234 	}
15235 }
15236 
get_next_instruction_in_block(const Instruction & instr)15237 const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
15238 {
15239 	// FIXME: This is kind of hacky. There should be a cleaner way.
15240 	auto offset = uint32_t(&instr - current_emitting_block->ops.data());
15241 	if ((offset + 1) < current_emitting_block->ops.size())
15242 		return &current_emitting_block->ops[offset + 1];
15243 	else
15244 		return nullptr;
15245 }
15246 
mask_relevant_memory_semantics(uint32_t semantics)15247 uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
15248 {
15249 	return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
15250 	                    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
15251 	                    MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
15252 }
15253 
emit_array_copy(const string & lhs,uint32_t,uint32_t rhs_id,StorageClass,StorageClass)15254 void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
15255 {
15256 	statement(lhs, " = ", to_expression(rhs_id), ";");
15257 }
15258 
unroll_array_to_complex_store(uint32_t target_id,uint32_t source_id)15259 bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
15260 {
15261 	if (!backend.force_gl_in_out_block)
15262 		return false;
15263 	// This path is only relevant for GL backends.
15264 
15265 	auto *var = maybe_get<SPIRVariable>(target_id);
15266 	if (!var || var->storage != StorageClassOutput)
15267 		return false;
15268 
15269 	if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask)
15270 		return false;
15271 
15272 	auto &type = expression_type(source_id);
15273 	string array_expr;
15274 	if (type.array_size_literal.back())
15275 	{
15276 		array_expr = convert_to_string(type.array.back());
15277 		if (type.array.back() == 0)
15278 			SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
15279 	}
15280 	else
15281 		array_expr = to_expression(type.array.back());
15282 
15283 	SPIRType target_type;
15284 	target_type.basetype = SPIRType::Int;
15285 
15286 	statement("for (int i = 0; i < int(", array_expr, "); i++)");
15287 	begin_scope();
15288 	statement(to_expression(target_id), "[i] = ",
15289 	          bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")),
15290 	          ";");
15291 	end_scope();
15292 
15293 	return true;
15294 }
15295 
unroll_array_from_complex_load(uint32_t target_id,uint32_t source_id,std::string & expr)15296 void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
15297 {
15298 	if (!backend.force_gl_in_out_block)
15299 		return;
15300 	// This path is only relevant for GL backends.
15301 
15302 	auto *var = maybe_get<SPIRVariable>(source_id);
15303 	if (!var)
15304 		return;
15305 
15306 	if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
15307 		return;
15308 
15309 	auto &type = get_variable_data_type(*var);
15310 	if (type.array.empty())
15311 		return;
15312 
15313 	auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
15314 	bool is_builtin = is_builtin_variable(*var) &&
15315 	                  (builtin == BuiltInPointSize ||
15316 	                   builtin == BuiltInPosition ||
15317 	                   builtin == BuiltInSampleMask);
15318 	bool is_tess = is_tessellation_shader();
15319 	bool is_patch = has_decoration(var->self, DecorationPatch);
15320 	bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
15321 
15322 	// Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
15323 	// We must unroll the array load.
15324 	// For builtins, we couldn't catch this case normally,
15325 	// because this is resolved in the OpAccessChain in most cases.
15326 	// If we load the entire array, we have no choice but to unroll here.
15327 	if (!is_patch && (is_builtin || is_tess))
15328 	{
15329 		auto new_expr = join("_", target_id, "_unrolled");
15330 		statement(variable_decl(type, new_expr, target_id), ";");
15331 		string array_expr;
15332 		if (type.array_size_literal.back())
15333 		{
15334 			array_expr = convert_to_string(type.array.back());
15335 			if (type.array.back() == 0)
15336 				SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
15337 		}
15338 		else
15339 			array_expr = to_expression(type.array.back());
15340 
15341 		// The array size might be a specialization constant, so use a for-loop instead.
15342 		statement("for (int i = 0; i < int(", array_expr, "); i++)");
15343 		begin_scope();
15344 		if (is_builtin && !is_sample_mask)
15345 			statement(new_expr, "[i] = gl_in[i].", expr, ";");
15346 		else if (is_sample_mask)
15347 		{
15348 			SPIRType target_type;
15349 			target_type.basetype = SPIRType::Int;
15350 			statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";");
15351 		}
15352 		else
15353 			statement(new_expr, "[i] = ", expr, "[i];");
15354 		end_scope();
15355 
15356 		expr = move(new_expr);
15357 	}
15358 }
15359 
cast_from_builtin_load(uint32_t source_id,std::string & expr,const SPIRType & expr_type)15360 void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
15361 {
15362 	// We will handle array cases elsewhere.
15363 	if (!expr_type.array.empty())
15364 		return;
15365 
15366 	auto *var = maybe_get_backing_variable(source_id);
15367 	if (var)
15368 		source_id = var->self;
15369 
15370 	// Only interested in standalone builtin variables.
15371 	if (!has_decoration(source_id, DecorationBuiltIn))
15372 		return;
15373 
15374 	auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
15375 	auto expected_type = expr_type.basetype;
15376 
15377 	// TODO: Fill in for more builtins.
15378 	switch (builtin)
15379 	{
15380 	case BuiltInLayer:
15381 	case BuiltInPrimitiveId:
15382 	case BuiltInViewportIndex:
15383 	case BuiltInInstanceId:
15384 	case BuiltInInstanceIndex:
15385 	case BuiltInVertexId:
15386 	case BuiltInVertexIndex:
15387 	case BuiltInSampleId:
15388 	case BuiltInBaseVertex:
15389 	case BuiltInBaseInstance:
15390 	case BuiltInDrawIndex:
15391 	case BuiltInFragStencilRefEXT:
15392 	case BuiltInInstanceCustomIndexNV:
15393 	case BuiltInSampleMask:
15394 	case BuiltInPrimitiveShadingRateKHR:
15395 	case BuiltInShadingRateKHR:
15396 		expected_type = SPIRType::Int;
15397 		break;
15398 
15399 	case BuiltInGlobalInvocationId:
15400 	case BuiltInLocalInvocationId:
15401 	case BuiltInWorkgroupId:
15402 	case BuiltInLocalInvocationIndex:
15403 	case BuiltInWorkgroupSize:
15404 	case BuiltInNumWorkgroups:
15405 	case BuiltInIncomingRayFlagsNV:
15406 	case BuiltInLaunchIdNV:
15407 	case BuiltInLaunchSizeNV:
15408 		expected_type = SPIRType::UInt;
15409 		break;
15410 
15411 	default:
15412 		break;
15413 	}
15414 
15415 	if (expected_type != expr_type.basetype)
15416 		expr = bitcast_expression(expr_type, expected_type, expr);
15417 }
15418 
cast_to_builtin_store(uint32_t target_id,std::string & expr,const SPIRType & expr_type)15419 void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
15420 {
15421 	auto *var = maybe_get_backing_variable(target_id);
15422 	if (var)
15423 		target_id = var->self;
15424 
15425 	// Only interested in standalone builtin variables.
15426 	if (!has_decoration(target_id, DecorationBuiltIn))
15427 		return;
15428 
15429 	auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
15430 	auto expected_type = expr_type.basetype;
15431 
15432 	// TODO: Fill in for more builtins.
15433 	switch (builtin)
15434 	{
15435 	case BuiltInLayer:
15436 	case BuiltInPrimitiveId:
15437 	case BuiltInViewportIndex:
15438 	case BuiltInFragStencilRefEXT:
15439 	case BuiltInSampleMask:
15440 	case BuiltInPrimitiveShadingRateKHR:
15441 	case BuiltInShadingRateKHR:
15442 		expected_type = SPIRType::Int;
15443 		break;
15444 
15445 	default:
15446 		break;
15447 	}
15448 
15449 	if (expected_type != expr_type.basetype)
15450 	{
15451 		auto type = expr_type;
15452 		type.basetype = expected_type;
15453 		expr = bitcast_expression(type, expr_type.basetype, expr);
15454 	}
15455 }
15456 
convert_non_uniform_expression(string & expr,uint32_t ptr_id)15457 void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
15458 {
15459 	if (*backend.nonuniform_qualifier == '\0')
15460 		return;
15461 
15462 	auto *var = maybe_get_backing_variable(ptr_id);
15463 	if (!var)
15464 		return;
15465 
15466 	if (var->storage != StorageClassUniformConstant &&
15467 	    var->storage != StorageClassStorageBuffer &&
15468 	    var->storage != StorageClassUniform)
15469 		return;
15470 
15471 	auto &backing_type = get<SPIRType>(var->basetype);
15472 	if (backing_type.array.empty())
15473 		return;
15474 
15475 	// If we get here, we know we're accessing an arrayed resource which
15476 	// might require nonuniform qualifier.
15477 
15478 	auto start_array_index = expr.find_first_of('[');
15479 
15480 	if (start_array_index == string::npos)
15481 		return;
15482 
15483 	// We've opened a bracket, track expressions until we can close the bracket.
15484 	// This must be our resource index.
15485 	size_t end_array_index = string::npos;
15486 	unsigned bracket_count = 1;
15487 	for (size_t index = start_array_index + 1; index < expr.size(); index++)
15488 	{
15489 		if (expr[index] == ']')
15490 		{
15491 			if (--bracket_count == 0)
15492 			{
15493 				end_array_index = index;
15494 				break;
15495 			}
15496 		}
15497 		else if (expr[index] == '[')
15498 			bracket_count++;
15499 	}
15500 
15501 	assert(bracket_count == 0);
15502 
15503 	// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
15504 	// nothing we can do here to express that.
15505 	if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
15506 		return;
15507 
15508 	start_array_index++;
15509 
15510 	expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
15511 	            expr.substr(start_array_index, end_array_index - start_array_index), ")",
15512 	            expr.substr(end_array_index, string::npos));
15513 }
15514 
emit_block_hints(const SPIRBlock & block)15515 void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
15516 {
15517 	if ((options.es && options.version < 310) || (!options.es && options.version < 140))
15518 		return;
15519 
15520 	switch (block.hint)
15521 	{
15522 	case SPIRBlock::HintFlatten:
15523 		require_extension_internal("GL_EXT_control_flow_attributes");
15524 		statement("SPIRV_CROSS_FLATTEN");
15525 		break;
15526 	case SPIRBlock::HintDontFlatten:
15527 		require_extension_internal("GL_EXT_control_flow_attributes");
15528 		statement("SPIRV_CROSS_BRANCH");
15529 		break;
15530 	case SPIRBlock::HintUnroll:
15531 		require_extension_internal("GL_EXT_control_flow_attributes");
15532 		statement("SPIRV_CROSS_UNROLL");
15533 		break;
15534 	case SPIRBlock::HintDontUnroll:
15535 		require_extension_internal("GL_EXT_control_flow_attributes");
15536 		statement("SPIRV_CROSS_LOOP");
15537 		break;
15538 	default:
15539 		break;
15540 	}
15541 }
15542 
preserve_alias_on_reset(uint32_t id)15543 void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
15544 {
15545 	preserved_aliases[id] = get_name(id);
15546 }
15547 
reset_name_caches()15548 void CompilerGLSL::reset_name_caches()
15549 {
15550 	for (auto &preserved : preserved_aliases)
15551 		set_name(preserved.first, preserved.second);
15552 
15553 	preserved_aliases.clear();
15554 	resource_names.clear();
15555 	block_input_names.clear();
15556 	block_output_names.clear();
15557 	block_ubo_names.clear();
15558 	block_ssbo_names.clear();
15559 	block_names.clear();
15560 	function_overloads.clear();
15561 }
15562 
fixup_type_alias()15563 void CompilerGLSL::fixup_type_alias()
15564 {
15565 	// Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
15566 	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
15567 		if (!type.type_alias)
15568 			return;
15569 
15570 		if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
15571 		{
15572 			// Top-level block types should never alias anything else.
15573 			type.type_alias = 0;
15574 		}
15575 		else if (type_is_block_like(type) && type.self == ID(self))
15576 		{
15577 			// A block-like type is any type which contains Offset decoration, but not top-level blocks,
15578 			// i.e. blocks which are placed inside buffers.
15579 			// Become the master.
15580 			ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
15581 				if (other_id == self)
15582 					return;
15583 
15584 				if (other_type.type_alias == type.type_alias)
15585 					other_type.type_alias = self;
15586 			});
15587 
15588 			this->get<SPIRType>(type.type_alias).type_alias = self;
15589 			type.type_alias = 0;
15590 		}
15591 	});
15592 }
15593 
reorder_type_alias()15594 void CompilerGLSL::reorder_type_alias()
15595 {
15596 	// Reorder declaration of types so that the master of the type alias is always emitted first.
15597 	// We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
15598 	// means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
15599 	auto loop_lock = ir.create_loop_hard_lock();
15600 
15601 	auto &type_ids = ir.ids_for_type[TypeType];
15602 	for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
15603 	{
15604 		auto &type = get<SPIRType>(*alias_itr);
15605 		if (type.type_alias != TypeID(0) &&
15606 		    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
15607 		{
15608 			// We will skip declaring this type, so make sure the type_alias type comes before.
15609 			auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
15610 			assert(master_itr != end(type_ids));
15611 
15612 			if (alias_itr < master_itr)
15613 			{
15614 				// Must also swap the type order for the constant-type joined array.
15615 				auto &joined_types = ir.ids_for_constant_or_type;
15616 				auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
15617 				auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
15618 				assert(alt_alias_itr != end(joined_types));
15619 				assert(alt_master_itr != end(joined_types));
15620 
15621 				swap(*alias_itr, *master_itr);
15622 				swap(*alt_alias_itr, *alt_master_itr);
15623 			}
15624 		}
15625 	}
15626 }
15627 
emit_line_directive(uint32_t file_id,uint32_t line_literal)15628 void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
15629 {
15630 	// If we are redirecting statements, ignore the line directive.
15631 	// Common case here is continue blocks.
15632 	if (redirect_statement)
15633 		return;
15634 
15635 	if (options.emit_line_directives)
15636 	{
15637 		require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
15638 		statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
15639 	}
15640 }
15641 
emit_copy_logical_type(uint32_t lhs_id,uint32_t lhs_type_id,uint32_t rhs_id,uint32_t rhs_type_id,SmallVector<uint32_t> chain)15642 void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
15643                                           SmallVector<uint32_t> chain)
15644 {
15645 	// Fully unroll all member/array indices one by one.
15646 
15647 	auto &lhs_type = get<SPIRType>(lhs_type_id);
15648 	auto &rhs_type = get<SPIRType>(rhs_type_id);
15649 
15650 	if (!lhs_type.array.empty())
15651 	{
15652 		// Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
15653 		// and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
15654 		uint32_t array_size = to_array_size_literal(lhs_type);
15655 		chain.push_back(0);
15656 
15657 		for (uint32_t i = 0; i < array_size; i++)
15658 		{
15659 			chain.back() = i;
15660 			emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
15661 		}
15662 	}
15663 	else if (lhs_type.basetype == SPIRType::Struct)
15664 	{
15665 		chain.push_back(0);
15666 		uint32_t member_count = uint32_t(lhs_type.member_types.size());
15667 		for (uint32_t i = 0; i < member_count; i++)
15668 		{
15669 			chain.back() = i;
15670 			emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
15671 		}
15672 	}
15673 	else
15674 	{
15675 		// Need to handle unpack/packing fixups since this can differ wildly between the logical types,
15676 		// particularly in MSL.
15677 		// To deal with this, we emit access chains and go through emit_store_statement
15678 		// to deal with all the special cases we can encounter.
15679 
15680 		AccessChainMeta lhs_meta, rhs_meta;
15681 		auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
15682 		                                 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
15683 		auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
15684 		                                 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
15685 
15686 		uint32_t id = ir.increase_bound_by(2);
15687 		lhs_id = id;
15688 		rhs_id = id + 1;
15689 
15690 		{
15691 			auto &lhs_expr = set<SPIRExpression>(lhs_id, move(lhs), lhs_type_id, true);
15692 			lhs_expr.need_transpose = lhs_meta.need_transpose;
15693 
15694 			if (lhs_meta.storage_is_packed)
15695 				set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15696 			if (lhs_meta.storage_physical_type != 0)
15697 				set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
15698 
15699 			forwarded_temporaries.insert(lhs_id);
15700 			suppressed_usage_tracking.insert(lhs_id);
15701 		}
15702 
15703 		{
15704 			auto &rhs_expr = set<SPIRExpression>(rhs_id, move(rhs), rhs_type_id, true);
15705 			rhs_expr.need_transpose = rhs_meta.need_transpose;
15706 
15707 			if (rhs_meta.storage_is_packed)
15708 				set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15709 			if (rhs_meta.storage_physical_type != 0)
15710 				set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
15711 
15712 			forwarded_temporaries.insert(rhs_id);
15713 			suppressed_usage_tracking.insert(rhs_id);
15714 		}
15715 
15716 		emit_store_statement(lhs_id, rhs_id);
15717 	}
15718 }
15719 
subpass_input_is_framebuffer_fetch(uint32_t id) const15720 bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
15721 {
15722 	if (!has_decoration(id, DecorationInputAttachmentIndex))
15723 		return false;
15724 
15725 	uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
15726 	for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15727 		if (remap.first == input_attachment_index)
15728 			return true;
15729 
15730 	return false;
15731 }
15732 
find_subpass_input_by_attachment_index(uint32_t index) const15733 const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
15734 {
15735 	const SPIRVariable *ret = nullptr;
15736 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15737 		if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
15738 		    get_decoration(var.self, DecorationInputAttachmentIndex) == index)
15739 		{
15740 			ret = &var;
15741 		}
15742 	});
15743 	return ret;
15744 }
15745 
find_color_output_by_location(uint32_t location) const15746 const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
15747 {
15748 	const SPIRVariable *ret = nullptr;
15749 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15750 		if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
15751 			ret = &var;
15752 	});
15753 	return ret;
15754 }
15755 
emit_inout_fragment_outputs_copy_to_subpass_inputs()15756 void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
15757 {
15758 	for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15759 	{
15760 		auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
15761 		auto *output_var = find_color_output_by_location(remap.second);
15762 		if (!subpass_var)
15763 			continue;
15764 		if (!output_var)
15765 			SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
15766 			                  "to read from it.");
15767 		if (is_array(get<SPIRType>(output_var->basetype)))
15768 			SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
15769 
15770 		auto &func = get<SPIRFunction>(get_entry_point().self);
15771 		func.fixup_hooks_in.push_back([=]() {
15772 			if (is_legacy())
15773 			{
15774 				statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
15775 				          get_decoration(output_var->self, DecorationLocation), "];");
15776 			}
15777 			else
15778 			{
15779 				uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
15780 				statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
15781 				          to_expression(output_var->self), ";");
15782 			}
15783 		});
15784 	}
15785 }
15786 
variable_is_depth_or_compare(VariableID id) const15787 bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
15788 {
15789 	return image_is_comparison(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
15790 }
15791 
get_extension_name(Candidate c)15792 const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
15793 {
15794 	static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
15795 		                                                "GL_KHR_shader_subgroup_basic",
15796 		                                                "GL_KHR_shader_subgroup_vote",
15797 		                                                "GL_NV_gpu_shader_5",
15798 		                                                "GL_NV_shader_thread_group",
15799 		                                                "GL_NV_shader_thread_shuffle",
15800 		                                                "GL_ARB_shader_ballot",
15801 		                                                "GL_ARB_shader_group_vote",
15802 		                                                "GL_AMD_gcn_shader" };
15803 	return retval[c];
15804 }
15805 
get_extra_required_extension_names(Candidate c)15806 SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
15807 {
15808 	switch (c)
15809 	{
15810 	case ARB_shader_ballot:
15811 		return { "GL_ARB_shader_int64" };
15812 	case AMD_gcn_shader:
15813 		return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
15814 	default:
15815 		return {};
15816 	}
15817 }
15818 
get_extra_required_extension_predicate(Candidate c)15819 const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
15820 {
15821 	switch (c)
15822 	{
15823 	case ARB_shader_ballot:
15824 		return "defined(GL_ARB_shader_int64)";
15825 	case AMD_gcn_shader:
15826 		return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
15827 	default:
15828 		return "";
15829 	}
15830 }
15831 
15832 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_feature_dependencies(Feature feature)15833     get_feature_dependencies(Feature feature)
15834 {
15835 	switch (feature)
15836 	{
15837 	case SubgroupAllEqualT:
15838 		return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
15839 	case SubgroupElect:
15840 		return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
15841 	case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15842 		return { SubgroupMask };
15843 	case SubgroupBallotBitCount:
15844 		return { SubgroupBallot };
15845 	default:
15846 		return {};
15847 	}
15848 }
15849 
15850 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
get_feature_dependency_mask(Feature feature)15851     get_feature_dependency_mask(Feature feature)
15852 {
15853 	return build_mask(get_feature_dependencies(feature));
15854 }
15855 
can_feature_be_implemented_without_extensions(Feature feature)15856 bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
15857 {
15858 	static const bool retval[FeatureCount] = { false, false, false, false, false, false,
15859 		                                       true, // SubgroupBalloFindLSB_MSB
15860 		                                       false, false, false, false,
15861 		                                       true, // SubgroupMemBarrier - replaced with workgroup memory barriers
15862 		                                       false, false, true,  false };
15863 
15864 	return retval[feature];
15865 }
15866 
15867 CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
get_KHR_extension_for_feature(Feature feature)15868     get_KHR_extension_for_feature(Feature feature)
15869 {
15870 	static const Candidate extensions[FeatureCount] = {
15871 		KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic,
15872 		KHR_shader_subgroup_basic,  KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
15873 		KHR_shader_subgroup_vote,   KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
15874 		KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
15875 	};
15876 
15877 	return extensions[feature];
15878 }
15879 
request_feature(Feature feature)15880 void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
15881 {
15882 	feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
15883 }
15884 
is_feature_requested(Feature feature) const15885 bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
15886 {
15887 	return (feature_mask & (1u << feature)) != 0;
15888 }
15889 
resolve() const15890 CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
15891 {
15892 	Result res;
15893 
15894 	for (uint32_t i = 0u; i < FeatureCount; ++i)
15895 	{
15896 		if (feature_mask & (1u << i))
15897 		{
15898 			auto feature = static_cast<Feature>(i);
15899 			std::unordered_set<uint32_t> unique_candidates;
15900 
15901 			auto candidates = get_candidates_for_feature(feature);
15902 			unique_candidates.insert(candidates.begin(), candidates.end());
15903 
15904 			auto deps = get_feature_dependencies(feature);
15905 			for (Feature d : deps)
15906 			{
15907 				candidates = get_candidates_for_feature(d);
15908 				if (!candidates.empty())
15909 					unique_candidates.insert(candidates.begin(), candidates.end());
15910 			}
15911 
15912 			for (uint32_t c : unique_candidates)
15913 				++res.weights[static_cast<Candidate>(c)];
15914 		}
15915 	}
15916 
15917 	return res;
15918 }
15919 
15920 CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_candidates_for_feature(Feature ft,const Result & r)15921     get_candidates_for_feature(Feature ft, const Result &r)
15922 {
15923 	auto c = get_candidates_for_feature(ft);
15924 	auto cmp = [&r](Candidate a, Candidate b) {
15925 		if (r.weights[a] == r.weights[b])
15926 			return a < b; // Prefer candidates with lower enum value
15927 		return r.weights[a] > r.weights[b];
15928 	};
15929 	std::sort(c.begin(), c.end(), cmp);
15930 	return c;
15931 }
15932 
15933 CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_candidates_for_feature(Feature feature)15934     get_candidates_for_feature(Feature feature)
15935 {
15936 	switch (feature)
15937 	{
15938 	case SubgroupMask:
15939 		return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
15940 	case SubgroupSize:
15941 		return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
15942 	case SubgroupInvocationID:
15943 		return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
15944 	case SubgroupID:
15945 		return { KHR_shader_subgroup_basic, NV_shader_thread_group };
15946 	case NumSubgroups:
15947 		return { KHR_shader_subgroup_basic, NV_shader_thread_group };
15948 	case SubgroupBroadcast_First:
15949 		return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
15950 	case SubgroupBallotFindLSB_MSB:
15951 		return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
15952 	case SubgroupAll_Any_AllEqualBool:
15953 		return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
15954 	case SubgroupAllEqualT:
15955 		return {}; // depends on other features only
15956 	case SubgroupElect:
15957 		return {}; // depends on other features only
15958 	case SubgroupBallot:
15959 		return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
15960 	case SubgroupBarrier:
15961 		return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
15962 	case SubgroupMemBarrier:
15963 		return { KHR_shader_subgroup_basic };
15964 	case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15965 		return {};
15966 	case SubgroupBallotBitExtract:
15967 		return { NV_shader_thread_group };
15968 	case SubgroupBallotBitCount:
15969 		return {};
15970 	default:
15971 		return {};
15972 	}
15973 }
15974 
build_mask(const SmallVector<Feature> & features)15975 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
15976     const SmallVector<Feature> &features)
15977 {
15978 	FeatureMask mask = 0;
15979 	for (Feature f : features)
15980 		mask |= FeatureMask(1) << f;
15981 	return mask;
15982 }
15983 
Result()15984 CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
15985 {
15986 	for (auto &weight : weights)
15987 		weight = 0;
15988 
15989 	// Make sure KHR_shader_subgroup extensions are always prefered.
15990 	const uint32_t big_num = FeatureCount;
15991 	weights[KHR_shader_subgroup_ballot] = big_num;
15992 	weights[KHR_shader_subgroup_basic] = big_num;
15993 	weights[KHR_shader_subgroup_vote] = big_num;
15994 }
15995 
request_workaround_wrapper_overload(TypeID id)15996 void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
15997 {
15998 	// Must be ordered to maintain deterministic output, so vector is appropriate.
15999 	if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
16000 	    end(workaround_ubo_load_overload_types))
16001 	{
16002 		force_recompile();
16003 		workaround_ubo_load_overload_types.push_back(id);
16004 	}
16005 }
16006 
rewrite_load_for_wrapped_row_major(std::string & expr,TypeID loaded_type,ID ptr)16007 void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
16008 {
16009 	// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
16010 	// To load these types correctly, we must first wrap them in a dummy function which only purpose is to
16011 	// ensure row_major decoration is actually respected.
16012 	auto *var = maybe_get_backing_variable(ptr);
16013 	if (!var)
16014 		return;
16015 
16016 	auto &backing_type = get<SPIRType>(var->basetype);
16017 	bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
16018 	              has_decoration(backing_type.self, DecorationBlock);
16019 	if (!is_ubo)
16020 		return;
16021 
16022 	auto *type = &get<SPIRType>(loaded_type);
16023 	bool rewrite = false;
16024 
16025 	if (is_matrix(*type))
16026 	{
16027 		// To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
16028 		// we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
16029 		// If there is any row-major action going on, we apply the workaround.
16030 		// It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
16031 		// If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
16032 		type = &backing_type;
16033 	}
16034 
16035 	if (type->basetype == SPIRType::Struct)
16036 	{
16037 		// If we're loading a struct where any member is a row-major matrix, apply the workaround.
16038 		for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
16039 		{
16040 			if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
16041 			{
16042 				rewrite = true;
16043 				break;
16044 			}
16045 		}
16046 	}
16047 
16048 	if (rewrite)
16049 	{
16050 		request_workaround_wrapper_overload(loaded_type);
16051 		expr = join("spvWorkaroundRowMajor(", expr, ")");
16052 	}
16053 }
16054 
mask_stage_output_by_location(uint32_t location,uint32_t component)16055 void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
16056 {
16057 	masked_output_locations.insert({ location, component });
16058 }
16059 
mask_stage_output_by_builtin(BuiltIn builtin)16060 void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
16061 {
16062 	masked_output_builtins.insert(builtin);
16063 }
16064 
is_stage_output_variable_masked(const SPIRVariable & var) const16065 bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
16066 {
16067 	auto &type = get<SPIRType>(var.basetype);
16068 	bool is_block = has_decoration(type.self, DecorationBlock);
16069 	// Blocks by themselves are never masked. Must be masked per-member.
16070 	if (is_block)
16071 		return false;
16072 
16073 	bool is_builtin = has_decoration(var.self, DecorationBuiltIn);
16074 
16075 	if (is_builtin)
16076 	{
16077 		return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn)));
16078 	}
16079 	else
16080 	{
16081 		if (!has_decoration(var.self, DecorationLocation))
16082 			return false;
16083 
16084 		return is_stage_output_location_masked(
16085 				get_decoration(var.self, DecorationLocation),
16086 				get_decoration(var.self, DecorationComponent));
16087 	}
16088 }
16089 
is_stage_output_block_member_masked(const SPIRVariable & var,uint32_t index,bool strip_array) const16090 bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
16091 {
16092 	auto &type = get<SPIRType>(var.basetype);
16093 	bool is_block = has_decoration(type.self, DecorationBlock);
16094 	if (!is_block)
16095 		return false;
16096 
16097 	BuiltIn builtin = BuiltInMax;
16098 	if (is_member_builtin(type, index, &builtin))
16099 	{
16100 		return is_stage_output_builtin_masked(builtin);
16101 	}
16102 	else
16103 	{
16104 		uint32_t location = get_declared_member_location(var, index, strip_array);
16105 		uint32_t component = get_member_decoration(type.self, index, DecorationComponent);
16106 		return is_stage_output_location_masked(location, component);
16107 	}
16108 }
16109 
is_stage_output_location_masked(uint32_t location,uint32_t component) const16110 bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
16111 {
16112 	return masked_output_locations.count({ location, component }) != 0;
16113 }
16114 
is_stage_output_builtin_masked(spv::BuiltIn builtin) const16115 bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
16116 {
16117 	return masked_output_builtins.count(builtin) != 0;
16118 }
16119 
get_declared_member_location(const SPIRVariable & var,uint32_t mbr_idx,bool strip_array) const16120 uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
16121 {
16122 	auto &block_type = get<SPIRType>(var.basetype);
16123 	if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation))
16124 		return get_member_decoration(block_type.self, mbr_idx, DecorationLocation);
16125 	else
16126 		return get_accumulated_member_location(var, mbr_idx, strip_array);
16127 }
16128 
get_accumulated_member_location(const SPIRVariable & var,uint32_t mbr_idx,bool strip_array) const16129 uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
16130 {
16131 	auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
16132 	uint32_t location = get_decoration(var.self, DecorationLocation);
16133 
16134 	for (uint32_t i = 0; i < mbr_idx; i++)
16135 	{
16136 		auto &mbr_type = get<SPIRType>(type.member_types[i]);
16137 
16138 		// Start counting from any place we have a new location decoration.
16139 		if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
16140 			location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
16141 
16142 		uint32_t location_count = type_to_location_count(mbr_type);
16143 		location += location_count;
16144 	}
16145 
16146 	return location;
16147 }
16148 
get_expression_effective_storage_class(uint32_t ptr)16149 StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
16150 {
16151 	auto *var = maybe_get_backing_variable(ptr);
16152 
16153 	// If the expression has been lowered to a temporary, we need to use the Generic storage class.
16154 	// We're looking for the effective storage class of a given expression.
16155 	// An access chain or forwarded OpLoads from such access chains
16156 	// will generally have the storage class of the underlying variable, but if the load was not forwarded
16157 	// we have lost any address space qualifiers.
16158 	bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
16159 	                        (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
16160 
16161 	if (var && !forced_temporary)
16162 	{
16163 		if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
16164 			return StorageClassWorkgroup;
16165 		if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
16166 			return StorageClassStorageBuffer;
16167 
16168 		// Normalize SSBOs to StorageBuffer here.
16169 		if (var->storage == StorageClassUniform &&
16170 		    has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
16171 			return StorageClassStorageBuffer;
16172 		else
16173 			return var->storage;
16174 	}
16175 	else
16176 		return expression_type(ptr).storage;
16177 }
16178 
type_to_location_count(const SPIRType & type) const16179 uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
16180 {
16181 	uint32_t count;
16182 	if (type.basetype == SPIRType::Struct)
16183 	{
16184 		uint32_t mbr_count = uint32_t(type.member_types.size());
16185 		count = 0;
16186 		for (uint32_t i = 0; i < mbr_count; i++)
16187 			count += type_to_location_count(get<SPIRType>(type.member_types[i]));
16188 	}
16189 	else
16190 	{
16191 		count = type.columns > 1 ? type.columns : 1;
16192 	}
16193 
16194 	uint32_t dim_count = uint32_t(type.array.size());
16195 	for (uint32_t i = 0; i < dim_count; i++)
16196 		count *= to_array_size_literal(type, i);
16197 
16198 	return count;
16199 }
16200