1 /*
2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22 */
23
24 #include "spirv_cross.hpp"
25 #include "GLSL.std.450.h"
26 #include "spirv_cfg.hpp"
27 #include "spirv_common.hpp"
28 #include "spirv_parser.hpp"
29 #include <algorithm>
30 #include <cstring>
31 #include <utility>
32
33 using namespace std;
34 using namespace spv;
35 using namespace SPIRV_CROSS_NAMESPACE;
36
Compiler(vector<uint32_t> ir_)37 Compiler::Compiler(vector<uint32_t> ir_)
38 {
39 Parser parser(move(ir_));
40 parser.parse();
41 set_ir(move(parser.get_parsed_ir()));
42 }
43
Compiler(const uint32_t * ir_,size_t word_count)44 Compiler::Compiler(const uint32_t *ir_, size_t word_count)
45 {
46 Parser parser(ir_, word_count);
47 parser.parse();
48 set_ir(move(parser.get_parsed_ir()));
49 }
50
Compiler(const ParsedIR & ir_)51 Compiler::Compiler(const ParsedIR &ir_)
52 {
53 set_ir(ir_);
54 }
55
Compiler(ParsedIR && ir_)56 Compiler::Compiler(ParsedIR &&ir_)
57 {
58 set_ir(move(ir_));
59 }
60
set_ir(ParsedIR && ir_)61 void Compiler::set_ir(ParsedIR &&ir_)
62 {
63 ir = move(ir_);
64 parse_fixup();
65 }
66
set_ir(const ParsedIR & ir_)67 void Compiler::set_ir(const ParsedIR &ir_)
68 {
69 ir = ir_;
70 parse_fixup();
71 }
72
compile()73 string Compiler::compile()
74 {
75 return "";
76 }
77
variable_storage_is_aliased(const SPIRVariable & v)78 bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
79 {
80 auto &type = get<SPIRType>(v.basetype);
81 bool ssbo = v.storage == StorageClassStorageBuffer ||
82 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
83 bool image = type.basetype == SPIRType::Image;
84 bool counter = type.basetype == SPIRType::AtomicCounter;
85 bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT;
86
87 bool is_restrict;
88 if (ssbo)
89 is_restrict = ir.get_buffer_block_flags(v).get(DecorationRestrict);
90 else
91 is_restrict = has_decoration(v.self, DecorationRestrict);
92
93 return !is_restrict && (ssbo || image || counter || buffer_reference);
94 }
95
block_is_pure(const SPIRBlock & block)96 bool Compiler::block_is_pure(const SPIRBlock &block)
97 {
98 // This is a global side effect of the function.
99 if (block.terminator == SPIRBlock::Kill ||
100 block.terminator == SPIRBlock::TerminateRay ||
101 block.terminator == SPIRBlock::IgnoreIntersection)
102 return false;
103
104 for (auto &i : block.ops)
105 {
106 auto ops = stream(i);
107 auto op = static_cast<Op>(i.op);
108
109 switch (op)
110 {
111 case OpFunctionCall:
112 {
113 uint32_t func = ops[2];
114 if (!function_is_pure(get<SPIRFunction>(func)))
115 return false;
116 break;
117 }
118
119 case OpCopyMemory:
120 case OpStore:
121 {
122 auto &type = expression_type(ops[0]);
123 if (type.storage != StorageClassFunction)
124 return false;
125 break;
126 }
127
128 case OpImageWrite:
129 return false;
130
131 // Atomics are impure.
132 case OpAtomicLoad:
133 case OpAtomicStore:
134 case OpAtomicExchange:
135 case OpAtomicCompareExchange:
136 case OpAtomicCompareExchangeWeak:
137 case OpAtomicIIncrement:
138 case OpAtomicIDecrement:
139 case OpAtomicIAdd:
140 case OpAtomicISub:
141 case OpAtomicSMin:
142 case OpAtomicUMin:
143 case OpAtomicSMax:
144 case OpAtomicUMax:
145 case OpAtomicAnd:
146 case OpAtomicOr:
147 case OpAtomicXor:
148 return false;
149
150 // Geometry shader builtins modify global state.
151 case OpEndPrimitive:
152 case OpEmitStreamVertex:
153 case OpEndStreamPrimitive:
154 case OpEmitVertex:
155 return false;
156
157 // Barriers disallow any reordering, so we should treat blocks with barrier as writing.
158 case OpControlBarrier:
159 case OpMemoryBarrier:
160 return false;
161
162 // Ray tracing builtins are impure.
163 case OpReportIntersectionKHR:
164 case OpIgnoreIntersectionNV:
165 case OpTerminateRayNV:
166 case OpTraceNV:
167 case OpTraceRayKHR:
168 case OpExecuteCallableNV:
169 case OpExecuteCallableKHR:
170 case OpRayQueryInitializeKHR:
171 case OpRayQueryTerminateKHR:
172 case OpRayQueryGenerateIntersectionKHR:
173 case OpRayQueryConfirmIntersectionKHR:
174 case OpRayQueryProceedKHR:
175 // There are various getters in ray query, but they are considered pure.
176 return false;
177
178 // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
179
180 case OpDemoteToHelperInvocationEXT:
181 // This is a global side effect of the function.
182 return false;
183
184 default:
185 break;
186 }
187 }
188
189 return true;
190 }
191
to_name(uint32_t id,bool allow_alias) const192 string Compiler::to_name(uint32_t id, bool allow_alias) const
193 {
194 if (allow_alias && ir.ids[id].get_type() == TypeType)
195 {
196 // If this type is a simple alias, emit the
197 // name of the original type instead.
198 // We don't want to override the meta alias
199 // as that can be overridden by the reflection APIs after parse.
200 auto &type = get<SPIRType>(id);
201 if (type.type_alias)
202 {
203 // If the alias master has been specially packed, we will have emitted a clean variant as well,
204 // so skip the name aliasing here.
205 if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
206 return to_name(type.type_alias);
207 }
208 }
209
210 auto &alias = ir.get_name(id);
211 if (alias.empty())
212 return join("_", id);
213 else
214 return alias;
215 }
216
function_is_pure(const SPIRFunction & func)217 bool Compiler::function_is_pure(const SPIRFunction &func)
218 {
219 for (auto block : func.blocks)
220 {
221 if (!block_is_pure(get<SPIRBlock>(block)))
222 {
223 //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str());
224 return false;
225 }
226 }
227
228 //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str());
229 return true;
230 }
231
register_global_read_dependencies(const SPIRBlock & block,uint32_t id)232 void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id)
233 {
234 for (auto &i : block.ops)
235 {
236 auto ops = stream(i);
237 auto op = static_cast<Op>(i.op);
238
239 switch (op)
240 {
241 case OpFunctionCall:
242 {
243 uint32_t func = ops[2];
244 register_global_read_dependencies(get<SPIRFunction>(func), id);
245 break;
246 }
247
248 case OpLoad:
249 case OpImageRead:
250 {
251 // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
252 auto *var = maybe_get_backing_variable(ops[2]);
253 if (var && var->storage != StorageClassFunction)
254 {
255 auto &type = get<SPIRType>(var->basetype);
256
257 // InputTargets are immutable.
258 if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData)
259 var->dependees.push_back(id);
260 }
261 break;
262 }
263
264 default:
265 break;
266 }
267 }
268 }
269
register_global_read_dependencies(const SPIRFunction & func,uint32_t id)270 void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id)
271 {
272 for (auto block : func.blocks)
273 register_global_read_dependencies(get<SPIRBlock>(block), id);
274 }
275
maybe_get_backing_variable(uint32_t chain)276 SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
277 {
278 auto *var = maybe_get<SPIRVariable>(chain);
279 if (!var)
280 {
281 auto *cexpr = maybe_get<SPIRExpression>(chain);
282 if (cexpr)
283 var = maybe_get<SPIRVariable>(cexpr->loaded_from);
284
285 auto *access_chain = maybe_get<SPIRAccessChain>(chain);
286 if (access_chain)
287 var = maybe_get<SPIRVariable>(access_chain->loaded_from);
288 }
289
290 return var;
291 }
292
register_read(uint32_t expr,uint32_t chain,bool forwarded)293 void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
294 {
295 auto &e = get<SPIRExpression>(expr);
296 auto *var = maybe_get_backing_variable(chain);
297
298 if (var)
299 {
300 e.loaded_from = var->self;
301
302 // If the backing variable is immutable, we do not need to depend on the variable.
303 if (forwarded && !is_immutable(var->self))
304 var->dependees.push_back(e.self);
305
306 // If we load from a parameter, make sure we create "inout" if we also write to the parameter.
307 // The default is "in" however, so we never invalidate our compilation by reading.
308 if (var && var->parameter)
309 var->parameter->read_count++;
310 }
311 }
312
register_write(uint32_t chain)313 void Compiler::register_write(uint32_t chain)
314 {
315 auto *var = maybe_get<SPIRVariable>(chain);
316 if (!var)
317 {
318 // If we're storing through an access chain, invalidate the backing variable instead.
319 auto *expr = maybe_get<SPIRExpression>(chain);
320 if (expr && expr->loaded_from)
321 var = maybe_get<SPIRVariable>(expr->loaded_from);
322
323 auto *access_chain = maybe_get<SPIRAccessChain>(chain);
324 if (access_chain && access_chain->loaded_from)
325 var = maybe_get<SPIRVariable>(access_chain->loaded_from);
326 }
327
328 auto &chain_type = expression_type(chain);
329
330 if (var)
331 {
332 bool check_argument_storage_qualifier = true;
333 auto &type = expression_type(chain);
334
335 // If our variable is in a storage class which can alias with other buffers,
336 // invalidate all variables which depend on aliased variables. And if this is a
337 // variable pointer, then invalidate all variables regardless.
338 if (get_variable_data_type(*var).pointer)
339 {
340 flush_all_active_variables();
341
342 if (type.pointer_depth == 1)
343 {
344 // We have a backing variable which is a pointer-to-pointer type.
345 // We are storing some data through a pointer acquired through that variable,
346 // but we are not writing to the value of the variable itself,
347 // i.e., we are not modifying the pointer directly.
348 // If we are storing a non-pointer type (pointer_depth == 1),
349 // we know that we are storing some unrelated data.
350 // A case here would be
351 // void foo(Foo * const *arg) {
352 // Foo *bar = *arg;
353 // bar->unrelated = 42;
354 // }
355 // arg, the argument is constant.
356 check_argument_storage_qualifier = false;
357 }
358 }
359
360 if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var))
361 flush_all_aliased_variables();
362 else if (var)
363 flush_dependees(*var);
364
365 // We tried to write to a parameter which is not marked with out qualifier, force a recompile.
366 if (check_argument_storage_qualifier && var->parameter && var->parameter->write_count == 0)
367 {
368 var->parameter->write_count++;
369 force_recompile();
370 }
371 }
372 else if (chain_type.pointer)
373 {
374 // If we stored through a variable pointer, then we don't know which
375 // variable we stored to. So *all* expressions after this point need to
376 // be invalidated.
377 // FIXME: If we can prove that the variable pointer will point to
378 // only certain variables, we can invalidate only those.
379 flush_all_active_variables();
380 }
381
382 // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead.
383 // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries.
384 }
385
flush_dependees(SPIRVariable & var)386 void Compiler::flush_dependees(SPIRVariable &var)
387 {
388 for (auto expr : var.dependees)
389 invalid_expressions.insert(expr);
390 var.dependees.clear();
391 }
392
flush_all_aliased_variables()393 void Compiler::flush_all_aliased_variables()
394 {
395 for (auto aliased : aliased_variables)
396 flush_dependees(get<SPIRVariable>(aliased));
397 }
398
flush_all_atomic_capable_variables()399 void Compiler::flush_all_atomic_capable_variables()
400 {
401 for (auto global : global_variables)
402 flush_dependees(get<SPIRVariable>(global));
403 flush_all_aliased_variables();
404 }
405
flush_control_dependent_expressions(uint32_t block_id)406 void Compiler::flush_control_dependent_expressions(uint32_t block_id)
407 {
408 auto &block = get<SPIRBlock>(block_id);
409 for (auto &expr : block.invalidate_expressions)
410 invalid_expressions.insert(expr);
411 block.invalidate_expressions.clear();
412 }
413
flush_all_active_variables()414 void Compiler::flush_all_active_variables()
415 {
416 // Invalidate all temporaries we read from variables in this block since they were forwarded.
417 // Invalidate all temporaries we read from globals.
418 for (auto &v : current_function->local_variables)
419 flush_dependees(get<SPIRVariable>(v));
420 for (auto &arg : current_function->arguments)
421 flush_dependees(get<SPIRVariable>(arg.id));
422 for (auto global : global_variables)
423 flush_dependees(get<SPIRVariable>(global));
424
425 flush_all_aliased_variables();
426 }
427
expression_type_id(uint32_t id) const428 uint32_t Compiler::expression_type_id(uint32_t id) const
429 {
430 switch (ir.ids[id].get_type())
431 {
432 case TypeVariable:
433 return get<SPIRVariable>(id).basetype;
434
435 case TypeExpression:
436 return get<SPIRExpression>(id).expression_type;
437
438 case TypeConstant:
439 return get<SPIRConstant>(id).constant_type;
440
441 case TypeConstantOp:
442 return get<SPIRConstantOp>(id).basetype;
443
444 case TypeUndef:
445 return get<SPIRUndef>(id).basetype;
446
447 case TypeCombinedImageSampler:
448 return get<SPIRCombinedImageSampler>(id).combined_type;
449
450 case TypeAccessChain:
451 return get<SPIRAccessChain>(id).basetype;
452
453 default:
454 SPIRV_CROSS_THROW("Cannot resolve expression type.");
455 }
456 }
457
expression_type(uint32_t id) const458 const SPIRType &Compiler::expression_type(uint32_t id) const
459 {
460 return get<SPIRType>(expression_type_id(id));
461 }
462
expression_is_lvalue(uint32_t id) const463 bool Compiler::expression_is_lvalue(uint32_t id) const
464 {
465 auto &type = expression_type(id);
466 switch (type.basetype)
467 {
468 case SPIRType::SampledImage:
469 case SPIRType::Image:
470 case SPIRType::Sampler:
471 return false;
472
473 default:
474 return true;
475 }
476 }
477
is_immutable(uint32_t id) const478 bool Compiler::is_immutable(uint32_t id) const
479 {
480 if (ir.ids[id].get_type() == TypeVariable)
481 {
482 auto &var = get<SPIRVariable>(id);
483
484 // Anything we load from the UniformConstant address space is guaranteed to be immutable.
485 bool pointer_to_const = var.storage == StorageClassUniformConstant;
486 return pointer_to_const || var.phi_variable || !expression_is_lvalue(id);
487 }
488 else if (ir.ids[id].get_type() == TypeAccessChain)
489 return get<SPIRAccessChain>(id).immutable;
490 else if (ir.ids[id].get_type() == TypeExpression)
491 return get<SPIRExpression>(id).immutable;
492 else if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp ||
493 ir.ids[id].get_type() == TypeUndef)
494 return true;
495 else
496 return false;
497 }
498
storage_class_is_interface(spv::StorageClass storage)499 static inline bool storage_class_is_interface(spv::StorageClass storage)
500 {
501 switch (storage)
502 {
503 case StorageClassInput:
504 case StorageClassOutput:
505 case StorageClassUniform:
506 case StorageClassUniformConstant:
507 case StorageClassAtomicCounter:
508 case StorageClassPushConstant:
509 case StorageClassStorageBuffer:
510 return true;
511
512 default:
513 return false;
514 }
515 }
516
is_hidden_variable(const SPIRVariable & var,bool include_builtins) const517 bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const
518 {
519 if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable)
520 return true;
521
522 // Combined image samplers are always considered active as they are "magic" variables.
523 if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) {
524 return samp.combined_id == var.self;
525 }) != end(combined_image_samplers))
526 {
527 return false;
528 }
529
530 // In SPIR-V 1.4 and up we must also use the active variable interface to disable global variables
531 // which are not part of the entry point.
532 if (ir.get_spirv_version() >= 0x10400 && var.storage != spv::StorageClassGeneric &&
533 var.storage != spv::StorageClassFunction && !interface_variable_exists_in_entry_point(var.self))
534 {
535 return true;
536 }
537
538 return check_active_interface_variables && storage_class_is_interface(var.storage) &&
539 active_interface_variables.find(var.self) == end(active_interface_variables);
540 }
541
is_builtin_type(const SPIRType & type) const542 bool Compiler::is_builtin_type(const SPIRType &type) const
543 {
544 auto *type_meta = ir.find_meta(type.self);
545
546 // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin.
547 if (type_meta)
548 for (auto &m : type_meta->members)
549 if (m.builtin)
550 return true;
551
552 return false;
553 }
554
is_builtin_variable(const SPIRVariable & var) const555 bool Compiler::is_builtin_variable(const SPIRVariable &var) const
556 {
557 auto *m = ir.find_meta(var.self);
558
559 if (var.compat_builtin || (m && m->decoration.builtin))
560 return true;
561 else
562 return is_builtin_type(get<SPIRType>(var.basetype));
563 }
564
is_member_builtin(const SPIRType & type,uint32_t index,BuiltIn * builtin) const565 bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const
566 {
567 auto *type_meta = ir.find_meta(type.self);
568
569 if (type_meta)
570 {
571 auto &memb = type_meta->members;
572 if (index < memb.size() && memb[index].builtin)
573 {
574 if (builtin)
575 *builtin = memb[index].builtin_type;
576 return true;
577 }
578 }
579
580 return false;
581 }
582
is_scalar(const SPIRType & type) const583 bool Compiler::is_scalar(const SPIRType &type) const
584 {
585 return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1;
586 }
587
is_vector(const SPIRType & type) const588 bool Compiler::is_vector(const SPIRType &type) const
589 {
590 return type.vecsize > 1 && type.columns == 1;
591 }
592
is_matrix(const SPIRType & type) const593 bool Compiler::is_matrix(const SPIRType &type) const
594 {
595 return type.vecsize > 1 && type.columns > 1;
596 }
597
is_array(const SPIRType & type) const598 bool Compiler::is_array(const SPIRType &type) const
599 {
600 return !type.array.empty();
601 }
602
get_shader_resources() const603 ShaderResources Compiler::get_shader_resources() const
604 {
605 return get_shader_resources(nullptr);
606 }
607
get_shader_resources(const unordered_set<VariableID> & active_variables) const608 ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const
609 {
610 return get_shader_resources(&active_variables);
611 }
612
handle(Op opcode,const uint32_t * args,uint32_t length)613 bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
614 {
615 uint32_t variable = 0;
616 switch (opcode)
617 {
618 // Need this first, otherwise, GCC complains about unhandled switch statements.
619 default:
620 break;
621
622 case OpFunctionCall:
623 {
624 // Invalid SPIR-V.
625 if (length < 3)
626 return false;
627
628 uint32_t count = length - 3;
629 args += 3;
630 for (uint32_t i = 0; i < count; i++)
631 {
632 auto *var = compiler.maybe_get<SPIRVariable>(args[i]);
633 if (var && storage_class_is_interface(var->storage))
634 variables.insert(args[i]);
635 }
636 break;
637 }
638
639 case OpSelect:
640 {
641 // Invalid SPIR-V.
642 if (length < 5)
643 return false;
644
645 uint32_t count = length - 3;
646 args += 3;
647 for (uint32_t i = 0; i < count; i++)
648 {
649 auto *var = compiler.maybe_get<SPIRVariable>(args[i]);
650 if (var && storage_class_is_interface(var->storage))
651 variables.insert(args[i]);
652 }
653 break;
654 }
655
656 case OpPhi:
657 {
658 // Invalid SPIR-V.
659 if (length < 2)
660 return false;
661
662 uint32_t count = length - 2;
663 args += 2;
664 for (uint32_t i = 0; i < count; i += 2)
665 {
666 auto *var = compiler.maybe_get<SPIRVariable>(args[i]);
667 if (var && storage_class_is_interface(var->storage))
668 variables.insert(args[i]);
669 }
670 break;
671 }
672
673 case OpAtomicStore:
674 case OpStore:
675 // Invalid SPIR-V.
676 if (length < 1)
677 return false;
678 variable = args[0];
679 break;
680
681 case OpCopyMemory:
682 {
683 if (length < 2)
684 return false;
685
686 auto *var = compiler.maybe_get<SPIRVariable>(args[0]);
687 if (var && storage_class_is_interface(var->storage))
688 variables.insert(args[0]);
689
690 var = compiler.maybe_get<SPIRVariable>(args[1]);
691 if (var && storage_class_is_interface(var->storage))
692 variables.insert(args[1]);
693 break;
694 }
695
696 case OpExtInst:
697 {
698 if (length < 5)
699 return false;
700 auto &extension_set = compiler.get<SPIRExtension>(args[2]);
701 switch (extension_set.ext)
702 {
703 case SPIRExtension::GLSL:
704 {
705 auto op = static_cast<GLSLstd450>(args[3]);
706
707 switch (op)
708 {
709 case GLSLstd450InterpolateAtCentroid:
710 case GLSLstd450InterpolateAtSample:
711 case GLSLstd450InterpolateAtOffset:
712 {
713 auto *var = compiler.maybe_get<SPIRVariable>(args[4]);
714 if (var && storage_class_is_interface(var->storage))
715 variables.insert(args[4]);
716 break;
717 }
718
719 default:
720 break;
721 }
722 break;
723 }
724 case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter:
725 {
726 enum AMDShaderExplicitVertexParameter
727 {
728 InterpolateAtVertexAMD = 1
729 };
730
731 auto op = static_cast<AMDShaderExplicitVertexParameter>(args[3]);
732
733 switch (op)
734 {
735 case InterpolateAtVertexAMD:
736 {
737 auto *var = compiler.maybe_get<SPIRVariable>(args[4]);
738 if (var && storage_class_is_interface(var->storage))
739 variables.insert(args[4]);
740 break;
741 }
742
743 default:
744 break;
745 }
746 break;
747 }
748 default:
749 break;
750 }
751 break;
752 }
753
754 case OpAccessChain:
755 case OpInBoundsAccessChain:
756 case OpPtrAccessChain:
757 case OpLoad:
758 case OpCopyObject:
759 case OpImageTexelPointer:
760 case OpAtomicLoad:
761 case OpAtomicExchange:
762 case OpAtomicCompareExchange:
763 case OpAtomicCompareExchangeWeak:
764 case OpAtomicIIncrement:
765 case OpAtomicIDecrement:
766 case OpAtomicIAdd:
767 case OpAtomicISub:
768 case OpAtomicSMin:
769 case OpAtomicUMin:
770 case OpAtomicSMax:
771 case OpAtomicUMax:
772 case OpAtomicAnd:
773 case OpAtomicOr:
774 case OpAtomicXor:
775 case OpArrayLength:
776 // Invalid SPIR-V.
777 if (length < 3)
778 return false;
779 variable = args[2];
780 break;
781 }
782
783 if (variable)
784 {
785 auto *var = compiler.maybe_get<SPIRVariable>(variable);
786 if (var && storage_class_is_interface(var->storage))
787 variables.insert(variable);
788 }
789 return true;
790 }
791
get_active_interface_variables() const792 unordered_set<VariableID> Compiler::get_active_interface_variables() const
793 {
794 // Traverse the call graph and find all interface variables which are in use.
795 unordered_set<VariableID> variables;
796 InterfaceVariableAccessHandler handler(*this, variables);
797 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
798
799 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
800 if (var.storage != StorageClassOutput)
801 return;
802 if (!interface_variable_exists_in_entry_point(var.self))
803 return;
804
805 // An output variable which is just declared (but uninitialized) might be read by subsequent stages
806 // so we should force-enable these outputs,
807 // since compilation will fail if a subsequent stage attempts to read from the variable in question.
808 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
809 if (var.initializer != ID(0) || get_execution_model() != ExecutionModelFragment)
810 variables.insert(var.self);
811 });
812
813 // If we needed to create one, we'll need it.
814 if (dummy_sampler_id)
815 variables.insert(dummy_sampler_id);
816
817 return variables;
818 }
819
set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)820 void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)
821 {
822 active_interface_variables = move(active_variables);
823 check_active_interface_variables = true;
824 }
825
get_shader_resources(const unordered_set<VariableID> * active_variables) const826 ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const
827 {
828 ShaderResources res;
829
830 bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant();
831
832 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
833 auto &type = this->get<SPIRType>(var.basetype);
834
835 // It is possible for uniform storage classes to be passed as function parameters, so detect
836 // that. To detect function parameters, check of StorageClass of variable is function scope.
837 if (var.storage == StorageClassFunction || !type.pointer)
838 return;
839
840 if (active_variables && active_variables->find(var.self) == end(*active_variables))
841 return;
842
843 // In SPIR-V 1.4 and up, every global must be present in the entry point interface list,
844 // not just IO variables.
845 bool active_in_entry_point = true;
846 if (ir.get_spirv_version() < 0x10400)
847 {
848 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
849 active_in_entry_point = interface_variable_exists_in_entry_point(var.self);
850 }
851 else
852 active_in_entry_point = interface_variable_exists_in_entry_point(var.self);
853
854 if (!active_in_entry_point)
855 return;
856
857 bool is_builtin = is_builtin_variable(var);
858
859 if (is_builtin)
860 {
861 if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
862 return;
863
864 auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs;
865 BuiltInResource resource;
866
867 if (has_decoration(type.self, DecorationBlock))
868 {
869 resource.resource = { var.self, var.basetype, type.self,
870 get_remapped_declared_block_name(var.self, false) };
871
872 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
873 {
874 resource.value_type_id = type.member_types[i];
875 resource.builtin = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn));
876 list.push_back(resource);
877 }
878 }
879 else
880 {
881 bool strip_array =
882 !has_decoration(var.self, DecorationPatch) && (
883 get_execution_model() == ExecutionModelTessellationControl ||
884 (get_execution_model() == ExecutionModelTessellationEvaluation &&
885 var.storage == StorageClassInput));
886
887 resource.resource = { var.self, var.basetype, type.self, get_name(var.self) };
888
889 if (strip_array && !type.array.empty())
890 resource.value_type_id = get_variable_data_type(var).parent_type;
891 else
892 resource.value_type_id = get_variable_data_type_id(var);
893
894 assert(resource.value_type_id);
895
896 resource.builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
897 list.push_back(std::move(resource));
898 }
899 return;
900 }
901
902 // Input
903 if (var.storage == StorageClassInput)
904 {
905 if (has_decoration(type.self, DecorationBlock))
906 {
907 res.stage_inputs.push_back(
908 { var.self, var.basetype, type.self,
909 get_remapped_declared_block_name(var.self, false) });
910 }
911 else
912 res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
913 }
914 // Subpass inputs
915 else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData)
916 {
917 res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
918 }
919 // Outputs
920 else if (var.storage == StorageClassOutput)
921 {
922 if (has_decoration(type.self, DecorationBlock))
923 {
924 res.stage_outputs.push_back(
925 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
926 }
927 else
928 res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
929 }
930 // UBOs
931 else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock))
932 {
933 res.uniform_buffers.push_back(
934 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
935 }
936 // Old way to declare SSBOs.
937 else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock))
938 {
939 res.storage_buffers.push_back(
940 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
941 }
942 // Modern way to declare SSBOs.
943 else if (type.storage == StorageClassStorageBuffer)
944 {
945 res.storage_buffers.push_back(
946 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
947 }
948 // Push constant blocks
949 else if (type.storage == StorageClassPushConstant)
950 {
951 // There can only be one push constant block, but keep the vector in case this restriction is lifted
952 // in the future.
953 res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
954 }
955 // Images
956 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image &&
957 type.image.sampled == 2)
958 {
959 res.storage_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
960 }
961 // Separate images
962 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image &&
963 type.image.sampled == 1)
964 {
965 res.separate_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
966 }
967 // Separate samplers
968 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Sampler)
969 {
970 res.separate_samplers.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
971 }
972 // Textures
973 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage)
974 {
975 res.sampled_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
976 }
977 // Atomic counters
978 else if (type.storage == StorageClassAtomicCounter)
979 {
980 res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
981 }
982 // Acceleration structures
983 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructure)
984 {
985 res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
986 }
987 });
988
989 return res;
990 }
991
type_is_block_like(const SPIRType & type) const992 bool Compiler::type_is_block_like(const SPIRType &type) const
993 {
994 if (type.basetype != SPIRType::Struct)
995 return false;
996
997 if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
998 {
999 return true;
1000 }
1001
1002 // Block-like types may have Offset decorations.
1003 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
1004 if (has_member_decoration(type.self, i, DecorationOffset))
1005 return true;
1006
1007 return false;
1008 }
1009
parse_fixup()1010 void Compiler::parse_fixup()
1011 {
1012 // Figure out specialization constants for work group sizes.
1013 for (auto id_ : ir.ids_for_constant_or_variable)
1014 {
1015 auto &id = ir.ids[id_];
1016
1017 if (id.get_type() == TypeConstant)
1018 {
1019 auto &c = id.get<SPIRConstant>();
1020 if (ir.meta[c.self].decoration.builtin && ir.meta[c.self].decoration.builtin_type == BuiltInWorkgroupSize)
1021 {
1022 // In current SPIR-V, there can be just one constant like this.
1023 // All entry points will receive the constant value.
1024 for (auto &entry : ir.entry_points)
1025 {
1026 entry.second.workgroup_size.constant = c.self;
1027 entry.second.workgroup_size.x = c.scalar(0, 0);
1028 entry.second.workgroup_size.y = c.scalar(0, 1);
1029 entry.second.workgroup_size.z = c.scalar(0, 2);
1030 }
1031 }
1032 }
1033 else if (id.get_type() == TypeVariable)
1034 {
1035 auto &var = id.get<SPIRVariable>();
1036 if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup ||
1037 var.storage == StorageClassOutput)
1038 global_variables.push_back(var.self);
1039 if (variable_storage_is_aliased(var))
1040 aliased_variables.push_back(var.self);
1041 }
1042 }
1043 }
1044
update_name_cache(unordered_set<string> & cache_primary,const unordered_set<string> & cache_secondary,string & name)1045 void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary,
1046 string &name)
1047 {
1048 if (name.empty())
1049 return;
1050
1051 const auto find_name = [&](const string &n) -> bool {
1052 if (cache_primary.find(n) != end(cache_primary))
1053 return true;
1054
1055 if (&cache_primary != &cache_secondary)
1056 if (cache_secondary.find(n) != end(cache_secondary))
1057 return true;
1058
1059 return false;
1060 };
1061
1062 const auto insert_name = [&](const string &n) { cache_primary.insert(n); };
1063
1064 if (!find_name(name))
1065 {
1066 insert_name(name);
1067 return;
1068 }
1069
1070 uint32_t counter = 0;
1071 auto tmpname = name;
1072
1073 bool use_linked_underscore = true;
1074
1075 if (tmpname == "_")
1076 {
1077 // We cannot just append numbers, as we will end up creating internally reserved names.
1078 // Make it like _0_<counter> instead.
1079 tmpname += "0";
1080 }
1081 else if (tmpname.back() == '_')
1082 {
1083 // The last_character is an underscore, so we don't need to link in underscore.
1084 // This would violate double underscore rules.
1085 use_linked_underscore = false;
1086 }
1087
1088 // If there is a collision (very rare),
1089 // keep tacking on extra identifier until it's unique.
1090 do
1091 {
1092 counter++;
1093 name = tmpname + (use_linked_underscore ? "_" : "") + convert_to_string(counter);
1094 } while (find_name(name));
1095 insert_name(name);
1096 }
1097
update_name_cache(unordered_set<string> & cache,string & name)1098 void Compiler::update_name_cache(unordered_set<string> &cache, string &name)
1099 {
1100 update_name_cache(cache, cache, name);
1101 }
1102
set_name(ID id,const std::string & name)1103 void Compiler::set_name(ID id, const std::string &name)
1104 {
1105 ir.set_name(id, name);
1106 }
1107
get_type(TypeID id) const1108 const SPIRType &Compiler::get_type(TypeID id) const
1109 {
1110 return get<SPIRType>(id);
1111 }
1112
get_type_from_variable(VariableID id) const1113 const SPIRType &Compiler::get_type_from_variable(VariableID id) const
1114 {
1115 return get<SPIRType>(get<SPIRVariable>(id).basetype);
1116 }
1117
get_pointee_type_id(uint32_t type_id) const1118 uint32_t Compiler::get_pointee_type_id(uint32_t type_id) const
1119 {
1120 auto *p_type = &get<SPIRType>(type_id);
1121 if (p_type->pointer)
1122 {
1123 assert(p_type->parent_type);
1124 type_id = p_type->parent_type;
1125 }
1126 return type_id;
1127 }
1128
get_pointee_type(const SPIRType & type) const1129 const SPIRType &Compiler::get_pointee_type(const SPIRType &type) const
1130 {
1131 auto *p_type = &type;
1132 if (p_type->pointer)
1133 {
1134 assert(p_type->parent_type);
1135 p_type = &get<SPIRType>(p_type->parent_type);
1136 }
1137 return *p_type;
1138 }
1139
get_pointee_type(uint32_t type_id) const1140 const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const
1141 {
1142 return get_pointee_type(get<SPIRType>(type_id));
1143 }
1144
get_variable_data_type_id(const SPIRVariable & var) const1145 uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const
1146 {
1147 if (var.phi_variable)
1148 return var.basetype;
1149 return get_pointee_type_id(var.basetype);
1150 }
1151
get_variable_data_type(const SPIRVariable & var)1152 SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var)
1153 {
1154 return get<SPIRType>(get_variable_data_type_id(var));
1155 }
1156
get_variable_data_type(const SPIRVariable & var) const1157 const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const
1158 {
1159 return get<SPIRType>(get_variable_data_type_id(var));
1160 }
1161
get_variable_element_type(const SPIRVariable & var)1162 SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var)
1163 {
1164 SPIRType *type = &get_variable_data_type(var);
1165 if (is_array(*type))
1166 type = &get<SPIRType>(type->parent_type);
1167 return *type;
1168 }
1169
get_variable_element_type(const SPIRVariable & var) const1170 const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const
1171 {
1172 const SPIRType *type = &get_variable_data_type(var);
1173 if (is_array(*type))
1174 type = &get<SPIRType>(type->parent_type);
1175 return *type;
1176 }
1177
is_sampled_image_type(const SPIRType & type)1178 bool Compiler::is_sampled_image_type(const SPIRType &type)
1179 {
1180 return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 &&
1181 type.image.dim != DimBuffer;
1182 }
1183
set_member_decoration_string(TypeID id,uint32_t index,spv::Decoration decoration,const std::string & argument)1184 void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
1185 const std::string &argument)
1186 {
1187 ir.set_member_decoration_string(id, index, decoration, argument);
1188 }
1189
set_member_decoration(TypeID id,uint32_t index,Decoration decoration,uint32_t argument)1190 void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
1191 {
1192 ir.set_member_decoration(id, index, decoration, argument);
1193 }
1194
set_member_name(TypeID id,uint32_t index,const std::string & name)1195 void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name)
1196 {
1197 ir.set_member_name(id, index, name);
1198 }
1199
get_member_name(TypeID id,uint32_t index) const1200 const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const
1201 {
1202 return ir.get_member_name(id, index);
1203 }
1204
set_qualified_name(uint32_t id,const string & name)1205 void Compiler::set_qualified_name(uint32_t id, const string &name)
1206 {
1207 ir.meta[id].decoration.qualified_alias = name;
1208 }
1209
set_member_qualified_name(uint32_t type_id,uint32_t index,const std::string & name)1210 void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name)
1211 {
1212 ir.meta[type_id].members.resize(max(ir.meta[type_id].members.size(), size_t(index) + 1));
1213 ir.meta[type_id].members[index].qualified_alias = name;
1214 }
1215
get_member_qualified_name(TypeID type_id,uint32_t index) const1216 const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const
1217 {
1218 auto *m = ir.find_meta(type_id);
1219 if (m && index < m->members.size())
1220 return m->members[index].qualified_alias;
1221 else
1222 return ir.get_empty_string();
1223 }
1224
get_member_decoration(TypeID id,uint32_t index,Decoration decoration) const1225 uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1226 {
1227 return ir.get_member_decoration(id, index, decoration);
1228 }
1229
get_member_decoration_bitset(TypeID id,uint32_t index) const1230 const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const
1231 {
1232 return ir.get_member_decoration_bitset(id, index);
1233 }
1234
has_member_decoration(TypeID id,uint32_t index,Decoration decoration) const1235 bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1236 {
1237 return ir.has_member_decoration(id, index, decoration);
1238 }
1239
unset_member_decoration(TypeID id,uint32_t index,Decoration decoration)1240 void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
1241 {
1242 ir.unset_member_decoration(id, index, decoration);
1243 }
1244
set_decoration_string(ID id,spv::Decoration decoration,const std::string & argument)1245 void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument)
1246 {
1247 ir.set_decoration_string(id, decoration, argument);
1248 }
1249
set_decoration(ID id,Decoration decoration,uint32_t argument)1250 void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument)
1251 {
1252 ir.set_decoration(id, decoration, argument);
1253 }
1254
set_extended_decoration(uint32_t id,ExtendedDecorations decoration,uint32_t value)1255 void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value)
1256 {
1257 auto &dec = ir.meta[id].decoration;
1258 dec.extended.flags.set(decoration);
1259 dec.extended.values[decoration] = value;
1260 }
1261
set_extended_member_decoration(uint32_t type,uint32_t index,ExtendedDecorations decoration,uint32_t value)1262 void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
1263 uint32_t value)
1264 {
1265 ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
1266 auto &dec = ir.meta[type].members[index];
1267 dec.extended.flags.set(decoration);
1268 dec.extended.values[decoration] = value;
1269 }
1270
get_default_extended_decoration(ExtendedDecorations decoration)1271 static uint32_t get_default_extended_decoration(ExtendedDecorations decoration)
1272 {
1273 switch (decoration)
1274 {
1275 case SPIRVCrossDecorationResourceIndexPrimary:
1276 case SPIRVCrossDecorationResourceIndexSecondary:
1277 case SPIRVCrossDecorationResourceIndexTertiary:
1278 case SPIRVCrossDecorationResourceIndexQuaternary:
1279 case SPIRVCrossDecorationInterfaceMemberIndex:
1280 return ~(0u);
1281
1282 default:
1283 return 0;
1284 }
1285 }
1286
get_extended_decoration(uint32_t id,ExtendedDecorations decoration) const1287 uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1288 {
1289 auto *m = ir.find_meta(id);
1290 if (!m)
1291 return 0;
1292
1293 auto &dec = m->decoration;
1294
1295 if (!dec.extended.flags.get(decoration))
1296 return get_default_extended_decoration(decoration);
1297
1298 return dec.extended.values[decoration];
1299 }
1300
get_extended_member_decoration(uint32_t type,uint32_t index,ExtendedDecorations decoration) const1301 uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1302 {
1303 auto *m = ir.find_meta(type);
1304 if (!m)
1305 return 0;
1306
1307 if (index >= m->members.size())
1308 return 0;
1309
1310 auto &dec = m->members[index];
1311 if (!dec.extended.flags.get(decoration))
1312 return get_default_extended_decoration(decoration);
1313 return dec.extended.values[decoration];
1314 }
1315
has_extended_decoration(uint32_t id,ExtendedDecorations decoration) const1316 bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1317 {
1318 auto *m = ir.find_meta(id);
1319 if (!m)
1320 return false;
1321
1322 auto &dec = m->decoration;
1323 return dec.extended.flags.get(decoration);
1324 }
1325
has_extended_member_decoration(uint32_t type,uint32_t index,ExtendedDecorations decoration) const1326 bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1327 {
1328 auto *m = ir.find_meta(type);
1329 if (!m)
1330 return false;
1331
1332 if (index >= m->members.size())
1333 return false;
1334
1335 auto &dec = m->members[index];
1336 return dec.extended.flags.get(decoration);
1337 }
1338
unset_extended_decoration(uint32_t id,ExtendedDecorations decoration)1339 void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration)
1340 {
1341 auto &dec = ir.meta[id].decoration;
1342 dec.extended.flags.clear(decoration);
1343 dec.extended.values[decoration] = 0;
1344 }
1345
unset_extended_member_decoration(uint32_t type,uint32_t index,ExtendedDecorations decoration)1346 void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration)
1347 {
1348 ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
1349 auto &dec = ir.meta[type].members[index];
1350 dec.extended.flags.clear(decoration);
1351 dec.extended.values[decoration] = 0;
1352 }
1353
get_storage_class(VariableID id) const1354 StorageClass Compiler::get_storage_class(VariableID id) const
1355 {
1356 return get<SPIRVariable>(id).storage;
1357 }
1358
get_name(ID id) const1359 const std::string &Compiler::get_name(ID id) const
1360 {
1361 return ir.get_name(id);
1362 }
1363
get_fallback_name(ID id) const1364 const std::string Compiler::get_fallback_name(ID id) const
1365 {
1366 return join("_", id);
1367 }
1368
get_block_fallback_name(VariableID id) const1369 const std::string Compiler::get_block_fallback_name(VariableID id) const
1370 {
1371 auto &var = get<SPIRVariable>(id);
1372 if (get_name(id).empty())
1373 return join("_", get<SPIRType>(var.basetype).self, "_", id);
1374 else
1375 return get_name(id);
1376 }
1377
get_decoration_bitset(ID id) const1378 const Bitset &Compiler::get_decoration_bitset(ID id) const
1379 {
1380 return ir.get_decoration_bitset(id);
1381 }
1382
has_decoration(ID id,Decoration decoration) const1383 bool Compiler::has_decoration(ID id, Decoration decoration) const
1384 {
1385 return ir.has_decoration(id, decoration);
1386 }
1387
get_decoration_string(ID id,Decoration decoration) const1388 const string &Compiler::get_decoration_string(ID id, Decoration decoration) const
1389 {
1390 return ir.get_decoration_string(id, decoration);
1391 }
1392
get_member_decoration_string(TypeID id,uint32_t index,Decoration decoration) const1393 const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
1394 {
1395 return ir.get_member_decoration_string(id, index, decoration);
1396 }
1397
get_decoration(ID id,Decoration decoration) const1398 uint32_t Compiler::get_decoration(ID id, Decoration decoration) const
1399 {
1400 return ir.get_decoration(id, decoration);
1401 }
1402
unset_decoration(ID id,Decoration decoration)1403 void Compiler::unset_decoration(ID id, Decoration decoration)
1404 {
1405 ir.unset_decoration(id, decoration);
1406 }
1407
get_binary_offset_for_decoration(VariableID id,spv::Decoration decoration,uint32_t & word_offset) const1408 bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const
1409 {
1410 auto *m = ir.find_meta(id);
1411 if (!m)
1412 return false;
1413
1414 auto &word_offsets = m->decoration_word_offset;
1415 auto itr = word_offsets.find(decoration);
1416 if (itr == end(word_offsets))
1417 return false;
1418
1419 word_offset = itr->second;
1420 return true;
1421 }
1422
block_is_loop_candidate(const SPIRBlock & block,SPIRBlock::Method method) const1423 bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const
1424 {
1425 // Tried and failed.
1426 if (block.disable_block_optimization || block.complex_continue)
1427 return false;
1428
1429 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
1430 {
1431 // Try to detect common for loop pattern
1432 // which the code backend can use to create cleaner code.
1433 // for(;;) { if (cond) { some_body; } else { break; } }
1434 // is the pattern we're looking for.
1435 const auto *false_block = maybe_get<SPIRBlock>(block.false_block);
1436 const auto *true_block = maybe_get<SPIRBlock>(block.true_block);
1437 const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block);
1438
1439 bool false_block_is_merge = block.false_block == block.merge_block ||
1440 (false_block && merge_block && execution_is_noop(*false_block, *merge_block));
1441
1442 bool true_block_is_merge = block.true_block == block.merge_block ||
1443 (true_block && merge_block && execution_is_noop(*true_block, *merge_block));
1444
1445 bool positive_candidate =
1446 block.true_block != block.merge_block && block.true_block != block.self && false_block_is_merge;
1447
1448 bool negative_candidate =
1449 block.false_block != block.merge_block && block.false_block != block.self && true_block_is_merge;
1450
1451 bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop &&
1452 (positive_candidate || negative_candidate);
1453
1454 if (ret && positive_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1455 ret = block.true_block == block.continue_block;
1456 else if (ret && negative_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1457 ret = block.false_block == block.continue_block;
1458
1459 // If we have OpPhi which depends on branches which came from our own block,
1460 // we need to flush phi variables in else block instead of a trivial break,
1461 // so we cannot assume this is a for loop candidate.
1462 if (ret)
1463 {
1464 for (auto &phi : block.phi_variables)
1465 if (phi.parent == block.self)
1466 return false;
1467
1468 auto *merge = maybe_get<SPIRBlock>(block.merge_block);
1469 if (merge)
1470 for (auto &phi : merge->phi_variables)
1471 if (phi.parent == block.self)
1472 return false;
1473 }
1474 return ret;
1475 }
1476 else if (method == SPIRBlock::MergeToDirectForLoop)
1477 {
1478 // Empty loop header that just sets up merge target
1479 // and branches to loop body.
1480 bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty();
1481
1482 if (!ret)
1483 return false;
1484
1485 auto &child = get<SPIRBlock>(block.next_block);
1486
1487 const auto *false_block = maybe_get<SPIRBlock>(child.false_block);
1488 const auto *true_block = maybe_get<SPIRBlock>(child.true_block);
1489 const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block);
1490
1491 bool false_block_is_merge = child.false_block == block.merge_block ||
1492 (false_block && merge_block && execution_is_noop(*false_block, *merge_block));
1493
1494 bool true_block_is_merge = child.true_block == block.merge_block ||
1495 (true_block && merge_block && execution_is_noop(*true_block, *merge_block));
1496
1497 bool positive_candidate =
1498 child.true_block != block.merge_block && child.true_block != block.self && false_block_is_merge;
1499
1500 bool negative_candidate =
1501 child.false_block != block.merge_block && child.false_block != block.self && true_block_is_merge;
1502
1503 ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone &&
1504 (positive_candidate || negative_candidate);
1505
1506 // If we have OpPhi which depends on branches which came from our own block,
1507 // we need to flush phi variables in else block instead of a trivial break,
1508 // so we cannot assume this is a for loop candidate.
1509 if (ret)
1510 {
1511 for (auto &phi : block.phi_variables)
1512 if (phi.parent == block.self || phi.parent == child.self)
1513 return false;
1514
1515 for (auto &phi : child.phi_variables)
1516 if (phi.parent == block.self)
1517 return false;
1518
1519 auto *merge = maybe_get<SPIRBlock>(block.merge_block);
1520 if (merge)
1521 for (auto &phi : merge->phi_variables)
1522 if (phi.parent == block.self || phi.parent == child.false_block)
1523 return false;
1524 }
1525
1526 return ret;
1527 }
1528 else
1529 return false;
1530 }
1531
execution_is_noop(const SPIRBlock & from,const SPIRBlock & to) const1532 bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const
1533 {
1534 if (!execution_is_branchless(from, to))
1535 return false;
1536
1537 auto *start = &from;
1538 for (;;)
1539 {
1540 if (start->self == to.self)
1541 return true;
1542
1543 if (!start->ops.empty())
1544 return false;
1545
1546 auto &next = get<SPIRBlock>(start->next_block);
1547 // Flushing phi variables does not count as noop.
1548 for (auto &phi : next.phi_variables)
1549 if (phi.parent == start->self)
1550 return false;
1551
1552 start = &next;
1553 }
1554 }
1555
execution_is_branchless(const SPIRBlock & from,const SPIRBlock & to) const1556 bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const
1557 {
1558 auto *start = &from;
1559 for (;;)
1560 {
1561 if (start->self == to.self)
1562 return true;
1563
1564 if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone)
1565 start = &get<SPIRBlock>(start->next_block);
1566 else
1567 return false;
1568 }
1569 }
1570
execution_is_direct_branch(const SPIRBlock & from,const SPIRBlock & to) const1571 bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const
1572 {
1573 return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self;
1574 }
1575
continue_block_type(const SPIRBlock & block) const1576 SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const
1577 {
1578 // The block was deemed too complex during code emit, pick conservative fallback paths.
1579 if (block.complex_continue)
1580 return SPIRBlock::ComplexLoop;
1581
1582 // In older glslang output continue block can be equal to the loop header.
1583 // In this case, execution is clearly branchless, so just assume a while loop header here.
1584 if (block.merge == SPIRBlock::MergeLoop)
1585 return SPIRBlock::WhileLoop;
1586
1587 if (block.loop_dominator == BlockID(SPIRBlock::NoDominator))
1588 {
1589 // Continue block is never reached from CFG.
1590 return SPIRBlock::ComplexLoop;
1591 }
1592
1593 auto &dominator = get<SPIRBlock>(block.loop_dominator);
1594
1595 if (execution_is_noop(block, dominator))
1596 return SPIRBlock::WhileLoop;
1597 else if (execution_is_branchless(block, dominator))
1598 return SPIRBlock::ForLoop;
1599 else
1600 {
1601 const auto *false_block = maybe_get<SPIRBlock>(block.false_block);
1602 const auto *true_block = maybe_get<SPIRBlock>(block.true_block);
1603 const auto *merge_block = maybe_get<SPIRBlock>(dominator.merge_block);
1604
1605 // If we need to flush Phi in this block, we cannot have a DoWhile loop.
1606 bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block);
1607 bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block);
1608 if (flush_phi_to_false || flush_phi_to_true)
1609 return SPIRBlock::ComplexLoop;
1610
1611 bool positive_do_while = block.true_block == dominator.self &&
1612 (block.false_block == dominator.merge_block ||
1613 (false_block && merge_block && execution_is_noop(*false_block, *merge_block)));
1614
1615 bool negative_do_while = block.false_block == dominator.self &&
1616 (block.true_block == dominator.merge_block ||
1617 (true_block && merge_block && execution_is_noop(*true_block, *merge_block)));
1618
1619 if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select &&
1620 (positive_do_while || negative_do_while))
1621 {
1622 return SPIRBlock::DoWhileLoop;
1623 }
1624 else
1625 return SPIRBlock::ComplexLoop;
1626 }
1627 }
1628
traverse_all_reachable_opcodes(const SPIRBlock & block,OpcodeHandler & handler) const1629 bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
1630 {
1631 handler.set_current_block(block);
1632 handler.rearm_current_block(block);
1633
1634 // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
1635 // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
1636 // inside dead blocks ...
1637 for (auto &i : block.ops)
1638 {
1639 auto ops = stream(i);
1640 auto op = static_cast<Op>(i.op);
1641
1642 if (!handler.handle(op, ops, i.length))
1643 return false;
1644
1645 if (op == OpFunctionCall)
1646 {
1647 auto &func = get<SPIRFunction>(ops[2]);
1648 if (handler.follow_function_call(func))
1649 {
1650 if (!handler.begin_function_scope(ops, i.length))
1651 return false;
1652 if (!traverse_all_reachable_opcodes(get<SPIRFunction>(ops[2]), handler))
1653 return false;
1654 if (!handler.end_function_scope(ops, i.length))
1655 return false;
1656
1657 handler.rearm_current_block(block);
1658 }
1659 }
1660 }
1661
1662 if (!handler.handle_terminator(block))
1663 return false;
1664
1665 return true;
1666 }
1667
traverse_all_reachable_opcodes(const SPIRFunction & func,OpcodeHandler & handler) const1668 bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const
1669 {
1670 for (auto block : func.blocks)
1671 if (!traverse_all_reachable_opcodes(get<SPIRBlock>(block), handler))
1672 return false;
1673
1674 return true;
1675 }
1676
type_struct_member_offset(const SPIRType & type,uint32_t index) const1677 uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const
1678 {
1679 auto *type_meta = ir.find_meta(type.self);
1680 if (type_meta)
1681 {
1682 // Decoration must be set in valid SPIR-V, otherwise throw.
1683 auto &dec = type_meta->members[index];
1684 if (dec.decoration_flags.get(DecorationOffset))
1685 return dec.offset;
1686 else
1687 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1688 }
1689 else
1690 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1691 }
1692
type_struct_member_array_stride(const SPIRType & type,uint32_t index) const1693 uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const
1694 {
1695 auto *type_meta = ir.find_meta(type.member_types[index]);
1696 if (type_meta)
1697 {
1698 // Decoration must be set in valid SPIR-V, otherwise throw.
1699 // ArrayStride is part of the array type not OpMemberDecorate.
1700 auto &dec = type_meta->decoration;
1701 if (dec.decoration_flags.get(DecorationArrayStride))
1702 return dec.array_stride;
1703 else
1704 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1705 }
1706 else
1707 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1708 }
1709
type_struct_member_matrix_stride(const SPIRType & type,uint32_t index) const1710 uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const
1711 {
1712 auto *type_meta = ir.find_meta(type.self);
1713 if (type_meta)
1714 {
1715 // Decoration must be set in valid SPIR-V, otherwise throw.
1716 // MatrixStride is part of OpMemberDecorate.
1717 auto &dec = type_meta->members[index];
1718 if (dec.decoration_flags.get(DecorationMatrixStride))
1719 return dec.matrix_stride;
1720 else
1721 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1722 }
1723 else
1724 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1725 }
1726
get_declared_struct_size(const SPIRType & type) const1727 size_t Compiler::get_declared_struct_size(const SPIRType &type) const
1728 {
1729 if (type.member_types.empty())
1730 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1731
1732 uint32_t last = uint32_t(type.member_types.size() - 1);
1733 size_t offset = type_struct_member_offset(type, last);
1734 size_t size = get_declared_struct_member_size(type, last);
1735 return offset + size;
1736 }
1737
get_declared_struct_size_runtime_array(const SPIRType & type,size_t array_size) const1738 size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const
1739 {
1740 if (type.member_types.empty())
1741 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1742
1743 size_t size = get_declared_struct_size(type);
1744 auto &last_type = get<SPIRType>(type.member_types.back());
1745 if (!last_type.array.empty() && last_type.array_size_literal[0] && last_type.array[0] == 0) // Runtime array
1746 size += array_size * type_struct_member_array_stride(type, uint32_t(type.member_types.size() - 1));
1747
1748 return size;
1749 }
1750
evaluate_spec_constant_u32(const SPIRConstantOp & spec) const1751 uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const
1752 {
1753 auto &result_type = get<SPIRType>(spec.basetype);
1754 if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int &&
1755 result_type.basetype != SPIRType::Boolean)
1756 {
1757 SPIRV_CROSS_THROW(
1758 "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n");
1759 }
1760
1761 if (!is_scalar(result_type))
1762 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
1763
1764 uint32_t value = 0;
1765
1766 const auto eval_u32 = [&](uint32_t id) -> uint32_t {
1767 auto &type = expression_type(id);
1768 if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean)
1769 {
1770 SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating "
1771 "specialization constants.\n");
1772 }
1773
1774 if (!is_scalar(type))
1775 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
1776 if (const auto *c = this->maybe_get<SPIRConstant>(id))
1777 return c->scalar();
1778 else
1779 return evaluate_spec_constant_u32(this->get<SPIRConstantOp>(id));
1780 };
1781
1782 #define binary_spec_op(op, binary_op) \
1783 case Op##op: \
1784 value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \
1785 break
1786 #define binary_spec_op_cast(op, binary_op, type) \
1787 case Op##op: \
1788 value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \
1789 break
1790
1791 // Support the basic opcodes which are typically used when computing array sizes.
1792 switch (spec.opcode)
1793 {
1794 binary_spec_op(IAdd, +);
1795 binary_spec_op(ISub, -);
1796 binary_spec_op(IMul, *);
1797 binary_spec_op(BitwiseAnd, &);
1798 binary_spec_op(BitwiseOr, |);
1799 binary_spec_op(BitwiseXor, ^);
1800 binary_spec_op(LogicalAnd, &);
1801 binary_spec_op(LogicalOr, |);
1802 binary_spec_op(ShiftLeftLogical, <<);
1803 binary_spec_op(ShiftRightLogical, >>);
1804 binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t);
1805 binary_spec_op(LogicalEqual, ==);
1806 binary_spec_op(LogicalNotEqual, !=);
1807 binary_spec_op(IEqual, ==);
1808 binary_spec_op(INotEqual, !=);
1809 binary_spec_op(ULessThan, <);
1810 binary_spec_op(ULessThanEqual, <=);
1811 binary_spec_op(UGreaterThan, >);
1812 binary_spec_op(UGreaterThanEqual, >=);
1813 binary_spec_op_cast(SLessThan, <, int32_t);
1814 binary_spec_op_cast(SLessThanEqual, <=, int32_t);
1815 binary_spec_op_cast(SGreaterThan, >, int32_t);
1816 binary_spec_op_cast(SGreaterThanEqual, >=, int32_t);
1817 #undef binary_spec_op
1818 #undef binary_spec_op_cast
1819
1820 case OpLogicalNot:
1821 value = uint32_t(!eval_u32(spec.arguments[0]));
1822 break;
1823
1824 case OpNot:
1825 value = ~eval_u32(spec.arguments[0]);
1826 break;
1827
1828 case OpSNegate:
1829 value = uint32_t(-int32_t(eval_u32(spec.arguments[0])));
1830 break;
1831
1832 case OpSelect:
1833 value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]);
1834 break;
1835
1836 case OpUMod:
1837 {
1838 uint32_t a = eval_u32(spec.arguments[0]);
1839 uint32_t b = eval_u32(spec.arguments[1]);
1840 if (b == 0)
1841 SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n");
1842 value = a % b;
1843 break;
1844 }
1845
1846 case OpSRem:
1847 {
1848 auto a = int32_t(eval_u32(spec.arguments[0]));
1849 auto b = int32_t(eval_u32(spec.arguments[1]));
1850 if (b == 0)
1851 SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n");
1852 value = a % b;
1853 break;
1854 }
1855
1856 case OpSMod:
1857 {
1858 auto a = int32_t(eval_u32(spec.arguments[0]));
1859 auto b = int32_t(eval_u32(spec.arguments[1]));
1860 if (b == 0)
1861 SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n");
1862 auto v = a % b;
1863
1864 // Makes sure we match the sign of b, not a.
1865 if ((b < 0 && v > 0) || (b > 0 && v < 0))
1866 v += b;
1867 value = v;
1868 break;
1869 }
1870
1871 case OpUDiv:
1872 {
1873 uint32_t a = eval_u32(spec.arguments[0]);
1874 uint32_t b = eval_u32(spec.arguments[1]);
1875 if (b == 0)
1876 SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n");
1877 value = a / b;
1878 break;
1879 }
1880
1881 case OpSDiv:
1882 {
1883 auto a = int32_t(eval_u32(spec.arguments[0]));
1884 auto b = int32_t(eval_u32(spec.arguments[1]));
1885 if (b == 0)
1886 SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n");
1887 value = a / b;
1888 break;
1889 }
1890
1891 default:
1892 SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n");
1893 }
1894
1895 return value;
1896 }
1897
evaluate_constant_u32(uint32_t id) const1898 uint32_t Compiler::evaluate_constant_u32(uint32_t id) const
1899 {
1900 if (const auto *c = maybe_get<SPIRConstant>(id))
1901 return c->scalar();
1902 else
1903 return evaluate_spec_constant_u32(get<SPIRConstantOp>(id));
1904 }
1905
get_declared_struct_member_size(const SPIRType & struct_type,uint32_t index) const1906 size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
1907 {
1908 if (struct_type.member_types.empty())
1909 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1910
1911 auto &flags = get_member_decoration_bitset(struct_type.self, index);
1912 auto &type = get<SPIRType>(struct_type.member_types[index]);
1913
1914 switch (type.basetype)
1915 {
1916 case SPIRType::Unknown:
1917 case SPIRType::Void:
1918 case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types.
1919 case SPIRType::AtomicCounter:
1920 case SPIRType::Image:
1921 case SPIRType::SampledImage:
1922 case SPIRType::Sampler:
1923 SPIRV_CROSS_THROW("Querying size for object with opaque size.");
1924
1925 default:
1926 break;
1927 }
1928
1929 if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
1930 {
1931 // Check if this is a top-level pointer type, and not an array of pointers.
1932 if (type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth)
1933 return 8;
1934 }
1935
1936 if (!type.array.empty())
1937 {
1938 // For arrays, we can use ArrayStride to get an easy check.
1939 bool array_size_literal = type.array_size_literal.back();
1940 uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(type.array.back());
1941 return type_struct_member_array_stride(struct_type, index) * array_size;
1942 }
1943 else if (type.basetype == SPIRType::Struct)
1944 {
1945 return get_declared_struct_size(type);
1946 }
1947 else
1948 {
1949 unsigned vecsize = type.vecsize;
1950 unsigned columns = type.columns;
1951
1952 // Vectors.
1953 if (columns == 1)
1954 {
1955 size_t component_size = type.width / 8;
1956 return vecsize * component_size;
1957 }
1958 else
1959 {
1960 uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index);
1961
1962 // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
1963 if (flags.get(DecorationRowMajor))
1964 return matrix_stride * vecsize;
1965 else if (flags.get(DecorationColMajor))
1966 return matrix_stride * columns;
1967 else
1968 SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices.");
1969 }
1970 }
1971 }
1972
handle(Op opcode,const uint32_t * args,uint32_t length)1973 bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
1974 {
1975 if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain && opcode != OpPtrAccessChain)
1976 return true;
1977
1978 bool ptr_chain = (opcode == OpPtrAccessChain);
1979
1980 // Invalid SPIR-V.
1981 if (length < (ptr_chain ? 5u : 4u))
1982 return false;
1983
1984 if (args[2] != id)
1985 return true;
1986
1987 // Don't bother traversing the entire access chain tree yet.
1988 // If we access a struct member, assume we access the entire member.
1989 uint32_t index = compiler.get<SPIRConstant>(args[ptr_chain ? 4 : 3]).scalar();
1990
1991 // Seen this index already.
1992 if (seen.find(index) != end(seen))
1993 return true;
1994 seen.insert(index);
1995
1996 auto &type = compiler.expression_type(id);
1997 uint32_t offset = compiler.type_struct_member_offset(type, index);
1998
1999 size_t range;
2000 // If we have another member in the struct, deduce the range by looking at the next member.
2001 // This is okay since structs in SPIR-V can have padding, but Offset decoration must be
2002 // monotonically increasing.
2003 // Of course, this doesn't take into account if the SPIR-V for some reason decided to add
2004 // very large amounts of padding, but that's not really a big deal.
2005 if (index + 1 < type.member_types.size())
2006 {
2007 range = compiler.type_struct_member_offset(type, index + 1) - offset;
2008 }
2009 else
2010 {
2011 // No padding, so just deduce it from the size of the member directly.
2012 range = compiler.get_declared_struct_member_size(type, index);
2013 }
2014
2015 ranges.push_back({ index, offset, range });
2016 return true;
2017 }
2018
get_active_buffer_ranges(VariableID id) const2019 SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const
2020 {
2021 SmallVector<BufferRange> ranges;
2022 BufferAccessHandler handler(*this, ranges, id);
2023 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
2024 return ranges;
2025 }
2026
types_are_logically_equivalent(const SPIRType & a,const SPIRType & b) const2027 bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const
2028 {
2029 if (a.basetype != b.basetype)
2030 return false;
2031 if (a.width != b.width)
2032 return false;
2033 if (a.vecsize != b.vecsize)
2034 return false;
2035 if (a.columns != b.columns)
2036 return false;
2037 if (a.array.size() != b.array.size())
2038 return false;
2039
2040 size_t array_count = a.array.size();
2041 if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0)
2042 return false;
2043
2044 if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage)
2045 {
2046 if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0)
2047 return false;
2048 }
2049
2050 if (a.member_types.size() != b.member_types.size())
2051 return false;
2052
2053 size_t member_types = a.member_types.size();
2054 for (size_t i = 0; i < member_types; i++)
2055 {
2056 if (!types_are_logically_equivalent(get<SPIRType>(a.member_types[i]), get<SPIRType>(b.member_types[i])))
2057 return false;
2058 }
2059
2060 return true;
2061 }
2062
get_execution_mode_bitset() const2063 const Bitset &Compiler::get_execution_mode_bitset() const
2064 {
2065 return get_entry_point().flags;
2066 }
2067
set_execution_mode(ExecutionMode mode,uint32_t arg0,uint32_t arg1,uint32_t arg2)2068 void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2)
2069 {
2070 auto &execution = get_entry_point();
2071
2072 execution.flags.set(mode);
2073 switch (mode)
2074 {
2075 case ExecutionModeLocalSize:
2076 execution.workgroup_size.x = arg0;
2077 execution.workgroup_size.y = arg1;
2078 execution.workgroup_size.z = arg2;
2079 break;
2080
2081 case ExecutionModeInvocations:
2082 execution.invocations = arg0;
2083 break;
2084
2085 case ExecutionModeOutputVertices:
2086 execution.output_vertices = arg0;
2087 break;
2088
2089 default:
2090 break;
2091 }
2092 }
2093
unset_execution_mode(ExecutionMode mode)2094 void Compiler::unset_execution_mode(ExecutionMode mode)
2095 {
2096 auto &execution = get_entry_point();
2097 execution.flags.clear(mode);
2098 }
2099
get_work_group_size_specialization_constants(SpecializationConstant & x,SpecializationConstant & y,SpecializationConstant & z) const2100 uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y,
2101 SpecializationConstant &z) const
2102 {
2103 auto &execution = get_entry_point();
2104 x = { 0, 0 };
2105 y = { 0, 0 };
2106 z = { 0, 0 };
2107
2108 if (execution.workgroup_size.constant != 0)
2109 {
2110 auto &c = get<SPIRConstant>(execution.workgroup_size.constant);
2111
2112 if (c.m.c[0].id[0] != ID(0))
2113 {
2114 x.id = c.m.c[0].id[0];
2115 x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId);
2116 }
2117
2118 if (c.m.c[0].id[1] != ID(0))
2119 {
2120 y.id = c.m.c[0].id[1];
2121 y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId);
2122 }
2123
2124 if (c.m.c[0].id[2] != ID(0))
2125 {
2126 z.id = c.m.c[0].id[2];
2127 z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId);
2128 }
2129 }
2130
2131 return execution.workgroup_size.constant;
2132 }
2133
get_execution_mode_argument(spv::ExecutionMode mode,uint32_t index) const2134 uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const
2135 {
2136 auto &execution = get_entry_point();
2137 switch (mode)
2138 {
2139 case ExecutionModeLocalSize:
2140 switch (index)
2141 {
2142 case 0:
2143 return execution.workgroup_size.x;
2144 case 1:
2145 return execution.workgroup_size.y;
2146 case 2:
2147 return execution.workgroup_size.z;
2148 default:
2149 return 0;
2150 }
2151
2152 case ExecutionModeInvocations:
2153 return execution.invocations;
2154
2155 case ExecutionModeOutputVertices:
2156 return execution.output_vertices;
2157
2158 default:
2159 return 0;
2160 }
2161 }
2162
get_execution_model() const2163 ExecutionModel Compiler::get_execution_model() const
2164 {
2165 auto &execution = get_entry_point();
2166 return execution.model;
2167 }
2168
is_tessellation_shader(ExecutionModel model)2169 bool Compiler::is_tessellation_shader(ExecutionModel model)
2170 {
2171 return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2172 }
2173
is_vertex_like_shader() const2174 bool Compiler::is_vertex_like_shader() const
2175 {
2176 auto model = get_execution_model();
2177 return model == ExecutionModelVertex || model == ExecutionModelGeometry ||
2178 model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2179 }
2180
is_tessellation_shader() const2181 bool Compiler::is_tessellation_shader() const
2182 {
2183 return is_tessellation_shader(get_execution_model());
2184 }
2185
set_remapped_variable_state(VariableID id,bool remap_enable)2186 void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable)
2187 {
2188 get<SPIRVariable>(id).remapped_variable = remap_enable;
2189 }
2190
get_remapped_variable_state(VariableID id) const2191 bool Compiler::get_remapped_variable_state(VariableID id) const
2192 {
2193 return get<SPIRVariable>(id).remapped_variable;
2194 }
2195
set_subpass_input_remapped_components(VariableID id,uint32_t components)2196 void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components)
2197 {
2198 get<SPIRVariable>(id).remapped_components = components;
2199 }
2200
get_subpass_input_remapped_components(VariableID id) const2201 uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const
2202 {
2203 return get<SPIRVariable>(id).remapped_components;
2204 }
2205
add_implied_read_expression(SPIRExpression & e,uint32_t source)2206 void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source)
2207 {
2208 auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
2209 if (itr == end(e.implied_read_expressions))
2210 e.implied_read_expressions.push_back(source);
2211 }
2212
add_implied_read_expression(SPIRAccessChain & e,uint32_t source)2213 void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source)
2214 {
2215 auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
2216 if (itr == end(e.implied_read_expressions))
2217 e.implied_read_expressions.push_back(source);
2218 }
2219
inherit_expression_dependencies(uint32_t dst,uint32_t source_expression)2220 void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression)
2221 {
2222 // Don't inherit any expression dependencies if the expression in dst
2223 // is not a forwarded temporary.
2224 if (forwarded_temporaries.find(dst) == end(forwarded_temporaries) ||
2225 forced_temporaries.find(dst) != end(forced_temporaries))
2226 {
2227 return;
2228 }
2229
2230 auto &e = get<SPIRExpression>(dst);
2231 auto *phi = maybe_get<SPIRVariable>(source_expression);
2232 if (phi && phi->phi_variable)
2233 {
2234 // We have used a phi variable, which can change at the end of the block,
2235 // so make sure we take a dependency on this phi variable.
2236 phi->dependees.push_back(dst);
2237 }
2238
2239 auto *s = maybe_get<SPIRExpression>(source_expression);
2240 if (!s)
2241 return;
2242
2243 auto &e_deps = e.expression_dependencies;
2244 auto &s_deps = s->expression_dependencies;
2245
2246 // If we depend on a expression, we also depend on all sub-dependencies from source.
2247 e_deps.push_back(source_expression);
2248 e_deps.insert(end(e_deps), begin(s_deps), end(s_deps));
2249
2250 // Eliminate duplicated dependencies.
2251 sort(begin(e_deps), end(e_deps));
2252 e_deps.erase(unique(begin(e_deps), end(e_deps)), end(e_deps));
2253 }
2254
get_entry_points_and_stages() const2255 SmallVector<EntryPoint> Compiler::get_entry_points_and_stages() const
2256 {
2257 SmallVector<EntryPoint> entries;
2258 for (auto &entry : ir.entry_points)
2259 entries.push_back({ entry.second.orig_name, entry.second.model });
2260 return entries;
2261 }
2262
rename_entry_point(const std::string & old_name,const std::string & new_name,spv::ExecutionModel model)2263 void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model)
2264 {
2265 auto &entry = get_entry_point(old_name, model);
2266 entry.orig_name = new_name;
2267 entry.name = new_name;
2268 }
2269
set_entry_point(const std::string & name,spv::ExecutionModel model)2270 void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model)
2271 {
2272 auto &entry = get_entry_point(name, model);
2273 ir.default_entry_point = entry.self;
2274 }
2275
get_first_entry_point(const std::string & name)2276 SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name)
2277 {
2278 auto itr = find_if(
2279 begin(ir.entry_points), end(ir.entry_points),
2280 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2281
2282 if (itr == end(ir.entry_points))
2283 SPIRV_CROSS_THROW("Entry point does not exist.");
2284
2285 return itr->second;
2286 }
2287
get_first_entry_point(const std::string & name) const2288 const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const
2289 {
2290 auto itr = find_if(
2291 begin(ir.entry_points), end(ir.entry_points),
2292 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2293
2294 if (itr == end(ir.entry_points))
2295 SPIRV_CROSS_THROW("Entry point does not exist.");
2296
2297 return itr->second;
2298 }
2299
get_entry_point(const std::string & name,ExecutionModel model)2300 SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model)
2301 {
2302 auto itr = find_if(begin(ir.entry_points), end(ir.entry_points),
2303 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2304 return entry.second.orig_name == name && entry.second.model == model;
2305 });
2306
2307 if (itr == end(ir.entry_points))
2308 SPIRV_CROSS_THROW("Entry point does not exist.");
2309
2310 return itr->second;
2311 }
2312
get_entry_point(const std::string & name,ExecutionModel model) const2313 const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const
2314 {
2315 auto itr = find_if(begin(ir.entry_points), end(ir.entry_points),
2316 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2317 return entry.second.orig_name == name && entry.second.model == model;
2318 });
2319
2320 if (itr == end(ir.entry_points))
2321 SPIRV_CROSS_THROW("Entry point does not exist.");
2322
2323 return itr->second;
2324 }
2325
get_cleansed_entry_point_name(const std::string & name,ExecutionModel model) const2326 const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const
2327 {
2328 return get_entry_point(name, model).name;
2329 }
2330
get_entry_point() const2331 const SPIREntryPoint &Compiler::get_entry_point() const
2332 {
2333 return ir.entry_points.find(ir.default_entry_point)->second;
2334 }
2335
get_entry_point()2336 SPIREntryPoint &Compiler::get_entry_point()
2337 {
2338 return ir.entry_points.find(ir.default_entry_point)->second;
2339 }
2340
interface_variable_exists_in_entry_point(uint32_t id) const2341 bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const
2342 {
2343 auto &var = get<SPIRVariable>(id);
2344
2345 if (ir.get_spirv_version() < 0x10400)
2346 {
2347 if (var.storage != StorageClassInput && var.storage != StorageClassOutput &&
2348 var.storage != StorageClassUniformConstant)
2349 SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface.");
2350
2351 // This is to avoid potential problems with very old glslang versions which did
2352 // not emit input/output interfaces properly.
2353 // We can assume they only had a single entry point, and single entry point
2354 // shaders could easily be assumed to use every interface variable anyways.
2355 if (ir.entry_points.size() <= 1)
2356 return true;
2357 }
2358
2359 // In SPIR-V 1.4 and later, all global resource variables must be present.
2360
2361 auto &execution = get_entry_point();
2362 return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) !=
2363 end(execution.interface_variables);
2364 }
2365
push_remap_parameters(const SPIRFunction & func,const uint32_t * args,uint32_t length)2366 void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args,
2367 uint32_t length)
2368 {
2369 // If possible, pipe through a remapping table so that parameters know
2370 // which variables they actually bind to in this scope.
2371 unordered_map<uint32_t, uint32_t> remapping;
2372 for (uint32_t i = 0; i < length; i++)
2373 remapping[func.arguments[i].id] = remap_parameter(args[i]);
2374 parameter_remapping.push(move(remapping));
2375 }
2376
pop_remap_parameters()2377 void Compiler::CombinedImageSamplerHandler::pop_remap_parameters()
2378 {
2379 parameter_remapping.pop();
2380 }
2381
remap_parameter(uint32_t id)2382 uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id)
2383 {
2384 auto *var = compiler.maybe_get_backing_variable(id);
2385 if (var)
2386 id = var->self;
2387
2388 if (parameter_remapping.empty())
2389 return id;
2390
2391 auto &remapping = parameter_remapping.top();
2392 auto itr = remapping.find(id);
2393 if (itr != end(remapping))
2394 return itr->second;
2395 else
2396 return id;
2397 }
2398
begin_function_scope(const uint32_t * args,uint32_t length)2399 bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length)
2400 {
2401 if (length < 3)
2402 return false;
2403
2404 auto &callee = compiler.get<SPIRFunction>(args[2]);
2405 args += 3;
2406 length -= 3;
2407 push_remap_parameters(callee, args, length);
2408 functions.push(&callee);
2409 return true;
2410 }
2411
end_function_scope(const uint32_t * args,uint32_t length)2412 bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length)
2413 {
2414 if (length < 3)
2415 return false;
2416
2417 auto &callee = compiler.get<SPIRFunction>(args[2]);
2418 args += 3;
2419
2420 // There are two types of cases we have to handle,
2421 // a callee might call sampler2D(texture2D, sampler) directly where
2422 // one or more parameters originate from parameters.
2423 // Alternatively, we need to provide combined image samplers to our callees,
2424 // and in this case we need to add those as well.
2425
2426 pop_remap_parameters();
2427
2428 // Our callee has now been processed at least once.
2429 // No point in doing it again.
2430 callee.do_combined_parameters = false;
2431
2432 auto ¶ms = functions.top()->combined_parameters;
2433 functions.pop();
2434 if (functions.empty())
2435 return true;
2436
2437 auto &caller = *functions.top();
2438 if (caller.do_combined_parameters)
2439 {
2440 for (auto ¶m : params)
2441 {
2442 VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]);
2443 VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]);
2444
2445 auto *i = compiler.maybe_get_backing_variable(image_id);
2446 auto *s = compiler.maybe_get_backing_variable(sampler_id);
2447 if (i)
2448 image_id = i->self;
2449 if (s)
2450 sampler_id = s->self;
2451
2452 register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth);
2453 }
2454 }
2455
2456 return true;
2457 }
2458
register_combined_image_sampler(SPIRFunction & caller,VariableID combined_module_id,VariableID image_id,VariableID sampler_id,bool depth)2459 void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller,
2460 VariableID combined_module_id,
2461 VariableID image_id, VariableID sampler_id,
2462 bool depth)
2463 {
2464 // We now have a texture ID and a sampler ID which will either be found as a global
2465 // or a parameter in our own function. If both are global, they will not need a parameter,
2466 // otherwise, add it to our list.
2467 SPIRFunction::CombinedImageSamplerParameter param = {
2468 0u, image_id, sampler_id, true, true, depth,
2469 };
2470
2471 auto texture_itr = find_if(begin(caller.arguments), end(caller.arguments),
2472 [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; });
2473 auto sampler_itr = find_if(begin(caller.arguments), end(caller.arguments),
2474 [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; });
2475
2476 if (texture_itr != end(caller.arguments))
2477 {
2478 param.global_image = false;
2479 param.image_id = uint32_t(texture_itr - begin(caller.arguments));
2480 }
2481
2482 if (sampler_itr != end(caller.arguments))
2483 {
2484 param.global_sampler = false;
2485 param.sampler_id = uint32_t(sampler_itr - begin(caller.arguments));
2486 }
2487
2488 if (param.global_image && param.global_sampler)
2489 return;
2490
2491 auto itr = find_if(begin(caller.combined_parameters), end(caller.combined_parameters),
2492 [¶m](const SPIRFunction::CombinedImageSamplerParameter &p) {
2493 return param.image_id == p.image_id && param.sampler_id == p.sampler_id &&
2494 param.global_image == p.global_image && param.global_sampler == p.global_sampler;
2495 });
2496
2497 if (itr == end(caller.combined_parameters))
2498 {
2499 uint32_t id = compiler.ir.increase_bound_by(3);
2500 auto type_id = id + 0;
2501 auto ptr_type_id = id + 1;
2502 auto combined_id = id + 2;
2503 auto &base = compiler.expression_type(image_id);
2504 auto &type = compiler.set<SPIRType>(type_id);
2505 auto &ptr_type = compiler.set<SPIRType>(ptr_type_id);
2506
2507 type = base;
2508 type.self = type_id;
2509 type.basetype = SPIRType::SampledImage;
2510 type.pointer = false;
2511 type.storage = StorageClassGeneric;
2512 type.image.depth = depth;
2513
2514 ptr_type = type;
2515 ptr_type.pointer = true;
2516 ptr_type.storage = StorageClassUniformConstant;
2517 ptr_type.parent_type = type_id;
2518
2519 // Build new variable.
2520 compiler.set<SPIRVariable>(combined_id, ptr_type_id, StorageClassFunction, 0);
2521
2522 // Inherit RelaxedPrecision.
2523 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
2524 bool relaxed_precision =
2525 compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) ||
2526 compiler.has_decoration(image_id, DecorationRelaxedPrecision) ||
2527 (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
2528
2529 if (relaxed_precision)
2530 compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
2531
2532 param.id = combined_id;
2533
2534 compiler.set_name(combined_id,
2535 join("SPIRV_Cross_Combined", compiler.to_name(image_id), compiler.to_name(sampler_id)));
2536
2537 caller.combined_parameters.push_back(param);
2538 caller.shadow_arguments.push_back({ ptr_type_id, combined_id, 0u, 0u, true });
2539 }
2540 }
2541
handle(Op opcode,const uint32_t * args,uint32_t length)2542 bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2543 {
2544 if (need_dummy_sampler)
2545 {
2546 // No need to traverse further, we know the result.
2547 return false;
2548 }
2549
2550 switch (opcode)
2551 {
2552 case OpLoad:
2553 {
2554 if (length < 3)
2555 return false;
2556
2557 uint32_t result_type = args[0];
2558
2559 auto &type = compiler.get<SPIRType>(result_type);
2560 bool separate_image =
2561 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2562
2563 // If not separate image, don't bother.
2564 if (!separate_image)
2565 return true;
2566
2567 uint32_t id = args[1];
2568 uint32_t ptr = args[2];
2569 compiler.set<SPIRExpression>(id, "", result_type, true);
2570 compiler.register_read(id, ptr, true);
2571 break;
2572 }
2573
2574 case OpImageFetch:
2575 case OpImageQuerySizeLod:
2576 case OpImageQuerySize:
2577 case OpImageQueryLevels:
2578 case OpImageQuerySamples:
2579 {
2580 // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler.
2581 auto *var = compiler.maybe_get_backing_variable(args[2]);
2582 if (var)
2583 {
2584 auto &type = compiler.get<SPIRType>(var->basetype);
2585 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
2586 need_dummy_sampler = true;
2587 }
2588
2589 break;
2590 }
2591
2592 case OpInBoundsAccessChain:
2593 case OpAccessChain:
2594 case OpPtrAccessChain:
2595 {
2596 if (length < 3)
2597 return false;
2598
2599 uint32_t result_type = args[0];
2600 auto &type = compiler.get<SPIRType>(result_type);
2601 bool separate_image =
2602 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2603 if (!separate_image)
2604 return true;
2605
2606 uint32_t id = args[1];
2607 uint32_t ptr = args[2];
2608 compiler.set<SPIRExpression>(id, "", result_type, true);
2609 compiler.register_read(id, ptr, true);
2610
2611 // Other backends might use SPIRAccessChain for this later.
2612 compiler.ir.ids[id].set_allow_type_rewrite();
2613 break;
2614 }
2615
2616 default:
2617 break;
2618 }
2619
2620 return true;
2621 }
2622
handle(Op opcode,const uint32_t * args,uint32_t length)2623 bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2624 {
2625 // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need.
2626 bool is_fetch = false;
2627
2628 switch (opcode)
2629 {
2630 case OpLoad:
2631 {
2632 if (length < 3)
2633 return false;
2634
2635 uint32_t result_type = args[0];
2636
2637 auto &type = compiler.get<SPIRType>(result_type);
2638 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2639 bool separate_sampler = type.basetype == SPIRType::Sampler;
2640
2641 // If not separate image or sampler, don't bother.
2642 if (!separate_image && !separate_sampler)
2643 return true;
2644
2645 uint32_t id = args[1];
2646 uint32_t ptr = args[2];
2647 compiler.set<SPIRExpression>(id, "", result_type, true);
2648 compiler.register_read(id, ptr, true);
2649 return true;
2650 }
2651
2652 case OpInBoundsAccessChain:
2653 case OpAccessChain:
2654 case OpPtrAccessChain:
2655 {
2656 if (length < 3)
2657 return false;
2658
2659 // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially
2660 // impossible to implement, since we don't know which concrete sampler we are accessing.
2661 // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds,
2662 // but this seems ridiculously complicated for a problem which is easy to work around.
2663 // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense.
2664
2665 uint32_t result_type = args[0];
2666
2667 auto &type = compiler.get<SPIRType>(result_type);
2668 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2669 bool separate_sampler = type.basetype == SPIRType::Sampler;
2670 if (separate_sampler)
2671 SPIRV_CROSS_THROW(
2672 "Attempting to use arrays or structs of separate samplers. This is not possible to statically "
2673 "remap to plain GLSL.");
2674
2675 if (separate_image)
2676 {
2677 uint32_t id = args[1];
2678 uint32_t ptr = args[2];
2679 compiler.set<SPIRExpression>(id, "", result_type, true);
2680 compiler.register_read(id, ptr, true);
2681 }
2682 return true;
2683 }
2684
2685 case OpImageFetch:
2686 case OpImageQuerySizeLod:
2687 case OpImageQuerySize:
2688 case OpImageQueryLevels:
2689 case OpImageQuerySamples:
2690 {
2691 // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler.
2692 auto *var = compiler.maybe_get_backing_variable(args[2]);
2693 if (!var)
2694 return true;
2695
2696 auto &type = compiler.get<SPIRType>(var->basetype);
2697 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
2698 {
2699 if (compiler.dummy_sampler_id == 0)
2700 SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with "
2701 "build_dummy_sampler_for_combined_images().");
2702
2703 // Do it outside.
2704 is_fetch = true;
2705 break;
2706 }
2707
2708 return true;
2709 }
2710
2711 case OpSampledImage:
2712 // Do it outside.
2713 break;
2714
2715 default:
2716 return true;
2717 }
2718
2719 // Registers sampler2D calls used in case they are parameters so
2720 // that their callees know which combined image samplers to propagate down the call stack.
2721 if (!functions.empty())
2722 {
2723 auto &callee = *functions.top();
2724 if (callee.do_combined_parameters)
2725 {
2726 uint32_t image_id = args[2];
2727
2728 auto *image = compiler.maybe_get_backing_variable(image_id);
2729 if (image)
2730 image_id = image->self;
2731
2732 uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3];
2733 auto *sampler = compiler.maybe_get_backing_variable(sampler_id);
2734 if (sampler)
2735 sampler_id = sampler->self;
2736
2737 uint32_t combined_id = args[1];
2738
2739 auto &combined_type = compiler.get<SPIRType>(args[0]);
2740 register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth);
2741 }
2742 }
2743
2744 // For function calls, we need to remap IDs which are function parameters into global variables.
2745 // This information is statically known from the current place in the call stack.
2746 // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
2747 // which backing variable the image/sample came from.
2748 VariableID image_id = remap_parameter(args[2]);
2749 VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
2750
2751 auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
2752 [image_id, sampler_id](const CombinedImageSampler &combined) {
2753 return combined.image_id == image_id && combined.sampler_id == sampler_id;
2754 });
2755
2756 if (itr == end(compiler.combined_image_samplers))
2757 {
2758 uint32_t sampled_type;
2759 uint32_t combined_module_id;
2760 if (is_fetch)
2761 {
2762 // Have to invent the sampled image type.
2763 sampled_type = compiler.ir.increase_bound_by(1);
2764 auto &type = compiler.set<SPIRType>(sampled_type);
2765 type = compiler.expression_type(args[2]);
2766 type.self = sampled_type;
2767 type.basetype = SPIRType::SampledImage;
2768 type.image.depth = false;
2769 combined_module_id = 0;
2770 }
2771 else
2772 {
2773 sampled_type = args[0];
2774 combined_module_id = args[1];
2775 }
2776
2777 auto id = compiler.ir.increase_bound_by(2);
2778 auto type_id = id + 0;
2779 auto combined_id = id + 1;
2780
2781 // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type.
2782 // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes.
2783 auto &type = compiler.set<SPIRType>(type_id);
2784 auto &base = compiler.get<SPIRType>(sampled_type);
2785 type = base;
2786 type.pointer = true;
2787 type.storage = StorageClassUniformConstant;
2788 type.parent_type = type_id;
2789
2790 // Build new variable.
2791 compiler.set<SPIRVariable>(combined_id, type_id, StorageClassUniformConstant, 0);
2792
2793 // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
2794 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
2795 bool relaxed_precision =
2796 (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) ||
2797 (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) ||
2798 (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
2799
2800 if (relaxed_precision)
2801 compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
2802
2803 // Propagate the array type for the original image as well.
2804 auto *var = compiler.maybe_get_backing_variable(image_id);
2805 if (var)
2806 {
2807 auto &parent_type = compiler.get<SPIRType>(var->basetype);
2808 type.array = parent_type.array;
2809 type.array_size_literal = parent_type.array_size_literal;
2810 }
2811
2812 compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id });
2813 }
2814
2815 return true;
2816 }
2817
build_dummy_sampler_for_combined_images()2818 VariableID Compiler::build_dummy_sampler_for_combined_images()
2819 {
2820 DummySamplerForCombinedImageHandler handler(*this);
2821 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
2822 if (handler.need_dummy_sampler)
2823 {
2824 uint32_t offset = ir.increase_bound_by(3);
2825 auto type_id = offset + 0;
2826 auto ptr_type_id = offset + 1;
2827 auto var_id = offset + 2;
2828
2829 SPIRType sampler_type;
2830 auto &sampler = set<SPIRType>(type_id);
2831 sampler.basetype = SPIRType::Sampler;
2832
2833 auto &ptr_sampler = set<SPIRType>(ptr_type_id);
2834 ptr_sampler = sampler;
2835 ptr_sampler.self = type_id;
2836 ptr_sampler.storage = StorageClassUniformConstant;
2837 ptr_sampler.pointer = true;
2838 ptr_sampler.parent_type = type_id;
2839
2840 set<SPIRVariable>(var_id, ptr_type_id, StorageClassUniformConstant, 0);
2841 set_name(var_id, "SPIRV_Cross_DummySampler");
2842 dummy_sampler_id = var_id;
2843 return var_id;
2844 }
2845 else
2846 return 0;
2847 }
2848
build_combined_image_samplers()2849 void Compiler::build_combined_image_samplers()
2850 {
2851 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
2852 func.combined_parameters.clear();
2853 func.shadow_arguments.clear();
2854 func.do_combined_parameters = true;
2855 });
2856
2857 combined_image_samplers.clear();
2858 CombinedImageSamplerHandler handler(*this);
2859 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
2860 }
2861
get_specialization_constants() const2862 SmallVector<SpecializationConstant> Compiler::get_specialization_constants() const
2863 {
2864 SmallVector<SpecializationConstant> spec_consts;
2865 ir.for_each_typed_id<SPIRConstant>([&](uint32_t, const SPIRConstant &c) {
2866 if (c.specialization && has_decoration(c.self, DecorationSpecId))
2867 spec_consts.push_back({ c.self, get_decoration(c.self, DecorationSpecId) });
2868 });
2869 return spec_consts;
2870 }
2871
get_constant(ConstantID id)2872 SPIRConstant &Compiler::get_constant(ConstantID id)
2873 {
2874 return get<SPIRConstant>(id);
2875 }
2876
get_constant(ConstantID id) const2877 const SPIRConstant &Compiler::get_constant(ConstantID id) const
2878 {
2879 return get<SPIRConstant>(id);
2880 }
2881
exists_unaccessed_path_to_return(const CFG & cfg,uint32_t block,const unordered_set<uint32_t> & blocks,unordered_set<uint32_t> & visit_cache)2882 static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks,
2883 unordered_set<uint32_t> &visit_cache)
2884 {
2885 // This block accesses the variable.
2886 if (blocks.find(block) != end(blocks))
2887 return false;
2888
2889 // We are at the end of the CFG.
2890 if (cfg.get_succeeding_edges(block).empty())
2891 return true;
2892
2893 // If any of our successors have a path to the end, there exists a path from block.
2894 for (auto &succ : cfg.get_succeeding_edges(block))
2895 {
2896 if (visit_cache.count(succ) == 0)
2897 {
2898 if (exists_unaccessed_path_to_return(cfg, succ, blocks, visit_cache))
2899 return true;
2900 visit_cache.insert(succ);
2901 }
2902 }
2903
2904 return false;
2905 }
2906
analyze_parameter_preservation(SPIRFunction & entry,const CFG & cfg,const unordered_map<uint32_t,unordered_set<uint32_t>> & variable_to_blocks,const unordered_map<uint32_t,unordered_set<uint32_t>> & complete_write_blocks)2907 void Compiler::analyze_parameter_preservation(
2908 SPIRFunction &entry, const CFG &cfg, const unordered_map<uint32_t, unordered_set<uint32_t>> &variable_to_blocks,
2909 const unordered_map<uint32_t, unordered_set<uint32_t>> &complete_write_blocks)
2910 {
2911 for (auto &arg : entry.arguments)
2912 {
2913 // Non-pointers are always inputs.
2914 auto &type = get<SPIRType>(arg.type);
2915 if (!type.pointer)
2916 continue;
2917
2918 // Opaque argument types are always in
2919 bool potential_preserve;
2920 switch (type.basetype)
2921 {
2922 case SPIRType::Sampler:
2923 case SPIRType::Image:
2924 case SPIRType::SampledImage:
2925 case SPIRType::AtomicCounter:
2926 potential_preserve = false;
2927 break;
2928
2929 default:
2930 potential_preserve = true;
2931 break;
2932 }
2933
2934 if (!potential_preserve)
2935 continue;
2936
2937 auto itr = variable_to_blocks.find(arg.id);
2938 if (itr == end(variable_to_blocks))
2939 {
2940 // Variable is never accessed.
2941 continue;
2942 }
2943
2944 // We have accessed a variable, but there was no complete writes to that variable.
2945 // We deduce that we must preserve the argument.
2946 itr = complete_write_blocks.find(arg.id);
2947 if (itr == end(complete_write_blocks))
2948 {
2949 arg.read_count++;
2950 continue;
2951 }
2952
2953 // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state
2954 // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function.
2955 // Major case here is if a function is
2956 // void foo(int &var) { if (cond) var = 10; }
2957 // Using read/write counts, we will think it's just an out variable, but it really needs to be inout,
2958 // because if we don't write anything whatever we put into the function must return back to the caller.
2959 unordered_set<uint32_t> visit_cache;
2960 if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second, visit_cache))
2961 arg.read_count++;
2962 }
2963 }
2964
AnalyzeVariableScopeAccessHandler(Compiler & compiler_,SPIRFunction & entry_)2965 Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_,
2966 SPIRFunction &entry_)
2967 : compiler(compiler_)
2968 , entry(entry_)
2969 {
2970 }
2971
follow_function_call(const SPIRFunction &)2972 bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &)
2973 {
2974 // Only analyze within this function.
2975 return false;
2976 }
2977
set_current_block(const SPIRBlock & block)2978 void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block)
2979 {
2980 current_block = █
2981
2982 // If we're branching to a block which uses OpPhi, in GLSL
2983 // this will be a variable write when we branch,
2984 // so we need to track access to these variables as well to
2985 // have a complete picture.
2986 const auto test_phi = [this, &block](uint32_t to) {
2987 auto &next = compiler.get<SPIRBlock>(to);
2988 for (auto &phi : next.phi_variables)
2989 {
2990 if (phi.parent == block.self)
2991 {
2992 accessed_variables_to_block[phi.function_variable].insert(block.self);
2993 // Phi variables are also accessed in our target branch block.
2994 accessed_variables_to_block[phi.function_variable].insert(next.self);
2995
2996 notify_variable_access(phi.local_variable, block.self);
2997 }
2998 }
2999 };
3000
3001 switch (block.terminator)
3002 {
3003 case SPIRBlock::Direct:
3004 notify_variable_access(block.condition, block.self);
3005 test_phi(block.next_block);
3006 break;
3007
3008 case SPIRBlock::Select:
3009 notify_variable_access(block.condition, block.self);
3010 test_phi(block.true_block);
3011 test_phi(block.false_block);
3012 break;
3013
3014 case SPIRBlock::MultiSelect:
3015 notify_variable_access(block.condition, block.self);
3016 for (auto &target : block.cases)
3017 test_phi(target.block);
3018 if (block.default_block)
3019 test_phi(block.default_block);
3020 break;
3021
3022 default:
3023 break;
3024 }
3025 }
3026
notify_variable_access(uint32_t id,uint32_t block)3027 void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block)
3028 {
3029 if (id == 0)
3030 return;
3031
3032 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3033 auto itr = access_chain_children.find(id);
3034 if (itr != end(access_chain_children))
3035 for (auto child_id : itr->second)
3036 notify_variable_access(child_id, block);
3037
3038 if (id_is_phi_variable(id))
3039 accessed_variables_to_block[id].insert(block);
3040 else if (id_is_potential_temporary(id))
3041 accessed_temporaries_to_block[id].insert(block);
3042 }
3043
id_is_phi_variable(uint32_t id) const3044 bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const
3045 {
3046 if (id >= compiler.get_current_id_bound())
3047 return false;
3048 auto *var = compiler.maybe_get<SPIRVariable>(id);
3049 return var && var->phi_variable;
3050 }
3051
id_is_potential_temporary(uint32_t id) const3052 bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const
3053 {
3054 if (id >= compiler.get_current_id_bound())
3055 return false;
3056
3057 // Temporaries are not created before we start emitting code.
3058 return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression);
3059 }
3060
handle_terminator(const SPIRBlock & block)3061 bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock &block)
3062 {
3063 switch (block.terminator)
3064 {
3065 case SPIRBlock::Return:
3066 if (block.return_value)
3067 notify_variable_access(block.return_value, block.self);
3068 break;
3069
3070 case SPIRBlock::Select:
3071 case SPIRBlock::MultiSelect:
3072 notify_variable_access(block.condition, block.self);
3073 break;
3074
3075 default:
3076 break;
3077 }
3078
3079 return true;
3080 }
3081
handle(spv::Op op,const uint32_t * args,uint32_t length)3082 bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3083 {
3084 // Keep track of the types of temporaries, so we can hoist them out as necessary.
3085 uint32_t result_type, result_id;
3086 if (compiler.instruction_to_result_type(result_type, result_id, op, args, length))
3087 result_id_to_type[result_id] = result_type;
3088
3089 switch (op)
3090 {
3091 case OpStore:
3092 {
3093 if (length < 2)
3094 return false;
3095
3096 ID ptr = args[0];
3097 auto *var = compiler.maybe_get_backing_variable(ptr);
3098
3099 // If we store through an access chain, we have a partial write.
3100 if (var)
3101 {
3102 accessed_variables_to_block[var->self].insert(current_block->self);
3103 if (var->self == ptr)
3104 complete_write_variables_to_block[var->self].insert(current_block->self);
3105 else
3106 partial_write_variables_to_block[var->self].insert(current_block->self);
3107 }
3108
3109 // args[0] might be an access chain we have to track use of.
3110 notify_variable_access(args[0], current_block->self);
3111 // Might try to store a Phi variable here.
3112 notify_variable_access(args[1], current_block->self);
3113 break;
3114 }
3115
3116 case OpAccessChain:
3117 case OpInBoundsAccessChain:
3118 case OpPtrAccessChain:
3119 {
3120 if (length < 3)
3121 return false;
3122
3123 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3124 uint32_t ptr = args[2];
3125 auto *var = compiler.maybe_get<SPIRVariable>(ptr);
3126 if (var)
3127 {
3128 accessed_variables_to_block[var->self].insert(current_block->self);
3129 access_chain_children[args[1]].insert(var->self);
3130 }
3131
3132 // args[2] might be another access chain we have to track use of.
3133 for (uint32_t i = 2; i < length; i++)
3134 {
3135 notify_variable_access(args[i], current_block->self);
3136 access_chain_children[args[1]].insert(args[i]);
3137 }
3138
3139 // Also keep track of the access chain pointer itself.
3140 // In exceptionally rare cases, we can end up with a case where
3141 // the access chain is generated in the loop body, but is consumed in continue block.
3142 // This means we need complex loop workarounds, and we must detect this via CFG analysis.
3143 notify_variable_access(args[1], current_block->self);
3144
3145 // The result of an access chain is a fixed expression and is not really considered a temporary.
3146 auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true);
3147 auto *backing_variable = compiler.maybe_get_backing_variable(ptr);
3148 e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0);
3149
3150 // Other backends might use SPIRAccessChain for this later.
3151 compiler.ir.ids[args[1]].set_allow_type_rewrite();
3152 access_chain_expressions.insert(args[1]);
3153 break;
3154 }
3155
3156 case OpCopyMemory:
3157 {
3158 if (length < 2)
3159 return false;
3160
3161 ID lhs = args[0];
3162 ID rhs = args[1];
3163 auto *var = compiler.maybe_get_backing_variable(lhs);
3164
3165 // If we store through an access chain, we have a partial write.
3166 if (var)
3167 {
3168 accessed_variables_to_block[var->self].insert(current_block->self);
3169 if (var->self == lhs)
3170 complete_write_variables_to_block[var->self].insert(current_block->self);
3171 else
3172 partial_write_variables_to_block[var->self].insert(current_block->self);
3173 }
3174
3175 // args[0:1] might be access chains we have to track use of.
3176 for (uint32_t i = 0; i < 2; i++)
3177 notify_variable_access(args[i], current_block->self);
3178
3179 var = compiler.maybe_get_backing_variable(rhs);
3180 if (var)
3181 accessed_variables_to_block[var->self].insert(current_block->self);
3182 break;
3183 }
3184
3185 case OpCopyObject:
3186 {
3187 if (length < 3)
3188 return false;
3189
3190 auto *var = compiler.maybe_get_backing_variable(args[2]);
3191 if (var)
3192 accessed_variables_to_block[var->self].insert(current_block->self);
3193
3194 // Might be an access chain which we have to keep track of.
3195 notify_variable_access(args[1], current_block->self);
3196 if (access_chain_expressions.count(args[2]))
3197 access_chain_expressions.insert(args[1]);
3198
3199 // Might try to copy a Phi variable here.
3200 notify_variable_access(args[2], current_block->self);
3201 break;
3202 }
3203
3204 case OpLoad:
3205 {
3206 if (length < 3)
3207 return false;
3208 uint32_t ptr = args[2];
3209 auto *var = compiler.maybe_get_backing_variable(ptr);
3210 if (var)
3211 accessed_variables_to_block[var->self].insert(current_block->self);
3212
3213 // Loaded value is a temporary.
3214 notify_variable_access(args[1], current_block->self);
3215
3216 // Might be an access chain we have to track use of.
3217 notify_variable_access(args[2], current_block->self);
3218 break;
3219 }
3220
3221 case OpFunctionCall:
3222 {
3223 if (length < 3)
3224 return false;
3225
3226 // Return value may be a temporary.
3227 if (compiler.get_type(args[0]).basetype != SPIRType::Void)
3228 notify_variable_access(args[1], current_block->self);
3229
3230 length -= 3;
3231 args += 3;
3232
3233 for (uint32_t i = 0; i < length; i++)
3234 {
3235 auto *var = compiler.maybe_get_backing_variable(args[i]);
3236 if (var)
3237 {
3238 accessed_variables_to_block[var->self].insert(current_block->self);
3239 // Assume we can get partial writes to this variable.
3240 partial_write_variables_to_block[var->self].insert(current_block->self);
3241 }
3242
3243 // Cannot easily prove if argument we pass to a function is completely written.
3244 // Usually, functions write to a dummy variable,
3245 // which is then copied to in full to the real argument.
3246
3247 // Might try to copy a Phi variable here.
3248 notify_variable_access(args[i], current_block->self);
3249 }
3250 break;
3251 }
3252
3253 case OpSelect:
3254 {
3255 // In case of variable pointers, we might access a variable here.
3256 // We cannot prove anything about these accesses however.
3257 for (uint32_t i = 1; i < length; i++)
3258 {
3259 if (i >= 3)
3260 {
3261 auto *var = compiler.maybe_get_backing_variable(args[i]);
3262 if (var)
3263 {
3264 accessed_variables_to_block[var->self].insert(current_block->self);
3265 // Assume we can get partial writes to this variable.
3266 partial_write_variables_to_block[var->self].insert(current_block->self);
3267 }
3268 }
3269
3270 // Might try to copy a Phi variable here.
3271 notify_variable_access(args[i], current_block->self);
3272 }
3273 break;
3274 }
3275
3276 case OpExtInst:
3277 {
3278 for (uint32_t i = 4; i < length; i++)
3279 notify_variable_access(args[i], current_block->self);
3280 notify_variable_access(args[1], current_block->self);
3281 break;
3282 }
3283
3284 case OpArrayLength:
3285 // Only result is a temporary.
3286 notify_variable_access(args[1], current_block->self);
3287 break;
3288
3289 case OpLine:
3290 case OpNoLine:
3291 // Uses literals, but cannot be a phi variable or temporary, so ignore.
3292 break;
3293
3294 // Atomics shouldn't be able to access function-local variables.
3295 // Some GLSL builtins access a pointer.
3296
3297 case OpCompositeInsert:
3298 case OpVectorShuffle:
3299 // Specialize for opcode which contains literals.
3300 for (uint32_t i = 1; i < 4; i++)
3301 notify_variable_access(args[i], current_block->self);
3302 break;
3303
3304 case OpCompositeExtract:
3305 // Specialize for opcode which contains literals.
3306 for (uint32_t i = 1; i < 3; i++)
3307 notify_variable_access(args[i], current_block->self);
3308 break;
3309
3310 case OpImageWrite:
3311 for (uint32_t i = 0; i < length; i++)
3312 {
3313 // Argument 3 is a literal.
3314 if (i != 3)
3315 notify_variable_access(args[i], current_block->self);
3316 }
3317 break;
3318
3319 case OpImageSampleImplicitLod:
3320 case OpImageSampleExplicitLod:
3321 case OpImageSparseSampleImplicitLod:
3322 case OpImageSparseSampleExplicitLod:
3323 case OpImageSampleProjImplicitLod:
3324 case OpImageSampleProjExplicitLod:
3325 case OpImageSparseSampleProjImplicitLod:
3326 case OpImageSparseSampleProjExplicitLod:
3327 case OpImageFetch:
3328 case OpImageSparseFetch:
3329 case OpImageRead:
3330 case OpImageSparseRead:
3331 for (uint32_t i = 1; i < length; i++)
3332 {
3333 // Argument 4 is a literal.
3334 if (i != 4)
3335 notify_variable_access(args[i], current_block->self);
3336 }
3337 break;
3338
3339 case OpImageSampleDrefImplicitLod:
3340 case OpImageSampleDrefExplicitLod:
3341 case OpImageSparseSampleDrefImplicitLod:
3342 case OpImageSparseSampleDrefExplicitLod:
3343 case OpImageSampleProjDrefImplicitLod:
3344 case OpImageSampleProjDrefExplicitLod:
3345 case OpImageSparseSampleProjDrefImplicitLod:
3346 case OpImageSparseSampleProjDrefExplicitLod:
3347 case OpImageGather:
3348 case OpImageSparseGather:
3349 case OpImageDrefGather:
3350 case OpImageSparseDrefGather:
3351 for (uint32_t i = 1; i < length; i++)
3352 {
3353 // Argument 5 is a literal.
3354 if (i != 5)
3355 notify_variable_access(args[i], current_block->self);
3356 }
3357 break;
3358
3359 default:
3360 {
3361 // Rather dirty way of figuring out where Phi variables are used.
3362 // As long as only IDs are used, we can scan through instructions and try to find any evidence that
3363 // the ID of a variable has been used.
3364 // There are potential false positives here where a literal is used in-place of an ID,
3365 // but worst case, it does not affect the correctness of the compile.
3366 // Exhaustive analysis would be better here, but it's not worth it for now.
3367 for (uint32_t i = 0; i < length; i++)
3368 notify_variable_access(args[i], current_block->self);
3369 break;
3370 }
3371 }
3372 return true;
3373 }
3374
StaticExpressionAccessHandler(Compiler & compiler_,uint32_t variable_id_)3375 Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_)
3376 : compiler(compiler_)
3377 , variable_id(variable_id_)
3378 {
3379 }
3380
follow_function_call(const SPIRFunction &)3381 bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &)
3382 {
3383 return false;
3384 }
3385
handle(spv::Op op,const uint32_t * args,uint32_t length)3386 bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3387 {
3388 switch (op)
3389 {
3390 case OpStore:
3391 if (length < 2)
3392 return false;
3393 if (args[0] == variable_id)
3394 {
3395 static_expression = args[1];
3396 write_count++;
3397 }
3398 break;
3399
3400 case OpLoad:
3401 if (length < 3)
3402 return false;
3403 if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized.
3404 return false;
3405 break;
3406
3407 case OpAccessChain:
3408 case OpInBoundsAccessChain:
3409 case OpPtrAccessChain:
3410 if (length < 3)
3411 return false;
3412 if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail.
3413 return false;
3414 break;
3415
3416 default:
3417 break;
3418 }
3419
3420 return true;
3421 }
3422
find_function_local_luts(SPIRFunction & entry,const AnalyzeVariableScopeAccessHandler & handler,bool single_function)3423 void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler,
3424 bool single_function)
3425 {
3426 auto &cfg = *function_cfgs.find(entry.self)->second;
3427
3428 // For each variable which is statically accessed.
3429 for (auto &accessed_var : handler.accessed_variables_to_block)
3430 {
3431 auto &blocks = accessed_var.second;
3432 auto &var = get<SPIRVariable>(accessed_var.first);
3433 auto &type = expression_type(accessed_var.first);
3434
3435 // Only consider function local variables here.
3436 // If we only have a single function in our CFG, private storage is also fine,
3437 // since it behaves like a function local variable.
3438 bool allow_lut = var.storage == StorageClassFunction || (single_function && var.storage == StorageClassPrivate);
3439 if (!allow_lut)
3440 continue;
3441
3442 // We cannot be a phi variable.
3443 if (var.phi_variable)
3444 continue;
3445
3446 // Only consider arrays here.
3447 if (type.array.empty())
3448 continue;
3449
3450 // If the variable has an initializer, make sure it is a constant expression.
3451 uint32_t static_constant_expression = 0;
3452 if (var.initializer)
3453 {
3454 if (ir.ids[var.initializer].get_type() != TypeConstant)
3455 continue;
3456 static_constant_expression = var.initializer;
3457
3458 // There can be no stores to this variable, we have now proved we have a LUT.
3459 if (handler.complete_write_variables_to_block.count(var.self) != 0 ||
3460 handler.partial_write_variables_to_block.count(var.self) != 0)
3461 continue;
3462 }
3463 else
3464 {
3465 // We can have one, and only one write to the variable, and that write needs to be a constant.
3466
3467 // No partial writes allowed.
3468 if (handler.partial_write_variables_to_block.count(var.self) != 0)
3469 continue;
3470
3471 auto itr = handler.complete_write_variables_to_block.find(var.self);
3472
3473 // No writes?
3474 if (itr == end(handler.complete_write_variables_to_block))
3475 continue;
3476
3477 // We write to the variable in more than one block.
3478 auto &write_blocks = itr->second;
3479 if (write_blocks.size() != 1)
3480 continue;
3481
3482 // The write needs to happen in the dominating block.
3483 DominatorBuilder builder(cfg);
3484 for (auto &block : blocks)
3485 builder.add_block(block);
3486 uint32_t dominator = builder.get_dominator();
3487
3488 // The complete write happened in a branch or similar, cannot deduce static expression.
3489 if (write_blocks.count(dominator) == 0)
3490 continue;
3491
3492 // Find the static expression for this variable.
3493 StaticExpressionAccessHandler static_expression_handler(*this, var.self);
3494 traverse_all_reachable_opcodes(get<SPIRBlock>(dominator), static_expression_handler);
3495
3496 // We want one, and exactly one write
3497 if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0)
3498 continue;
3499
3500 // Is it a constant expression?
3501 if (ir.ids[static_expression_handler.static_expression].get_type() != TypeConstant)
3502 continue;
3503
3504 // We found a LUT!
3505 static_constant_expression = static_expression_handler.static_expression;
3506 }
3507
3508 get<SPIRConstant>(static_constant_expression).is_used_as_lut = true;
3509 var.static_expression = static_constant_expression;
3510 var.statically_assigned = true;
3511 var.remapped_variable = true;
3512 }
3513 }
3514
analyze_variable_scope(SPIRFunction & entry,AnalyzeVariableScopeAccessHandler & handler)3515 void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler)
3516 {
3517 // First, we map out all variable access within a function.
3518 // Essentially a map of block -> { variables accessed in the basic block }
3519 traverse_all_reachable_opcodes(entry, handler);
3520
3521 auto &cfg = *function_cfgs.find(entry.self)->second;
3522
3523 // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier.
3524 analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block,
3525 handler.complete_write_variables_to_block);
3526
3527 unordered_map<uint32_t, uint32_t> potential_loop_variables;
3528
3529 // Find the loop dominator block for each block.
3530 for (auto &block_id : entry.blocks)
3531 {
3532 auto &block = get<SPIRBlock>(block_id);
3533
3534 auto itr = ir.continue_block_to_loop_header.find(block_id);
3535 if (itr != end(ir.continue_block_to_loop_header) && itr->second != block_id)
3536 {
3537 // Continue block might be unreachable in the CFG, but we still like to know the loop dominator.
3538 // Edge case is when continue block is also the loop header, don't set the dominator in this case.
3539 block.loop_dominator = itr->second;
3540 }
3541 else
3542 {
3543 uint32_t loop_dominator = cfg.find_loop_dominator(block_id);
3544 if (loop_dominator != block_id)
3545 block.loop_dominator = loop_dominator;
3546 else
3547 block.loop_dominator = SPIRBlock::NoDominator;
3548 }
3549 }
3550
3551 // For each variable which is statically accessed.
3552 for (auto &var : handler.accessed_variables_to_block)
3553 {
3554 // Only deal with variables which are considered local variables in this function.
3555 if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) ==
3556 end(entry.local_variables))
3557 continue;
3558
3559 DominatorBuilder builder(cfg);
3560 auto &blocks = var.second;
3561 auto &type = expression_type(var.first);
3562
3563 // Figure out which block is dominating all accesses of those variables.
3564 for (auto &block : blocks)
3565 {
3566 // If we're accessing a variable inside a continue block, this variable might be a loop variable.
3567 // We can only use loop variables with scalars, as we cannot track static expressions for vectors.
3568 if (is_continue(block))
3569 {
3570 // Potentially awkward case to check for.
3571 // We might have a variable inside a loop, which is touched by the continue block,
3572 // but is not actually a loop variable.
3573 // The continue block is dominated by the inner part of the loop, which does not make sense in high-level
3574 // language output because it will be declared before the body,
3575 // so we will have to lift the dominator up to the relevant loop header instead.
3576 builder.add_block(ir.continue_block_to_loop_header[block]);
3577
3578 // Arrays or structs cannot be loop variables.
3579 if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty())
3580 {
3581 // The variable is used in multiple continue blocks, this is not a loop
3582 // candidate, signal that by setting block to -1u.
3583 auto &potential = potential_loop_variables[var.first];
3584
3585 if (potential == 0)
3586 potential = block;
3587 else
3588 potential = ~(0u);
3589 }
3590 }
3591 builder.add_block(block);
3592 }
3593
3594 builder.lift_continue_block_dominator();
3595
3596 // Add it to a per-block list of variables.
3597 BlockID dominating_block = builder.get_dominator();
3598
3599 // For variables whose dominating block is inside a loop, there is a risk that these variables
3600 // actually need to be preserved across loop iterations. We can express this by adding
3601 // a "read" access to the loop header.
3602 // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable.
3603 // Should that fail, we look for the outermost loop header and tack on an access there.
3604 // Phi nodes cannot have this problem.
3605 if (dominating_block)
3606 {
3607 auto &variable = get<SPIRVariable>(var.first);
3608 if (!variable.phi_variable)
3609 {
3610 auto *block = &get<SPIRBlock>(dominating_block);
3611 bool preserve = may_read_undefined_variable_in_block(*block, var.first);
3612 if (preserve)
3613 {
3614 // Find the outermost loop scope.
3615 while (block->loop_dominator != BlockID(SPIRBlock::NoDominator))
3616 block = &get<SPIRBlock>(block->loop_dominator);
3617
3618 if (block->self != dominating_block)
3619 {
3620 builder.add_block(block->self);
3621 dominating_block = builder.get_dominator();
3622 }
3623 }
3624 }
3625 }
3626
3627 // If all blocks here are dead code, this will be 0, so the variable in question
3628 // will be completely eliminated.
3629 if (dominating_block)
3630 {
3631 auto &block = get<SPIRBlock>(dominating_block);
3632 block.dominated_variables.push_back(var.first);
3633 get<SPIRVariable>(var.first).dominator = dominating_block;
3634 }
3635 }
3636
3637 for (auto &var : handler.accessed_temporaries_to_block)
3638 {
3639 auto itr = handler.result_id_to_type.find(var.first);
3640
3641 if (itr == end(handler.result_id_to_type))
3642 {
3643 // We found a false positive ID being used, ignore.
3644 // This should probably be an assert.
3645 continue;
3646 }
3647
3648 // There is no point in doing domination analysis for opaque types.
3649 auto &type = get<SPIRType>(itr->second);
3650 if (type_is_opaque_value(type))
3651 continue;
3652
3653 DominatorBuilder builder(cfg);
3654 bool force_temporary = false;
3655 bool used_in_header_hoisted_continue_block = false;
3656
3657 // Figure out which block is dominating all accesses of those temporaries.
3658 auto &blocks = var.second;
3659 for (auto &block : blocks)
3660 {
3661 builder.add_block(block);
3662
3663 if (blocks.size() != 1 && is_continue(block))
3664 {
3665 // The risk here is that inner loop can dominate the continue block.
3666 // Any temporary we access in the continue block must be declared before the loop.
3667 // This is moot for complex loops however.
3668 auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]);
3669 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
3670 builder.add_block(loop_header_block.self);
3671 used_in_header_hoisted_continue_block = true;
3672 }
3673 }
3674
3675 uint32_t dominating_block = builder.get_dominator();
3676
3677 if (blocks.size() != 1 && is_single_block_loop(dominating_block))
3678 {
3679 // Awkward case, because the loop header is also the continue block,
3680 // so hoisting to loop header does not help.
3681 force_temporary = true;
3682 }
3683
3684 if (dominating_block)
3685 {
3686 // If we touch a variable in the dominating block, this is the expected setup.
3687 // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops.
3688 bool first_use_is_dominator = blocks.count(dominating_block) != 0;
3689
3690 if (!first_use_is_dominator || force_temporary)
3691 {
3692 if (handler.access_chain_expressions.count(var.first))
3693 {
3694 // Exceptionally rare case.
3695 // We cannot declare temporaries of access chains (except on MSL perhaps with pointers).
3696 // Rather than do that, we force the indexing expressions to be declared in the right scope by
3697 // tracking their usage to that end. There is no temporary to hoist.
3698 // However, we still need to observe declaration order of the access chain.
3699
3700 if (used_in_header_hoisted_continue_block)
3701 {
3702 // For this scenario, we used an access chain inside a continue block where we also registered an access to header block.
3703 // This is a problem as we need to declare an access chain properly first with full definition.
3704 // We cannot use temporaries for these expressions,
3705 // so we must make sure the access chain is declared ahead of time.
3706 // Force a complex for loop to deal with this.
3707 // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option.
3708 auto &loop_header_block = get<SPIRBlock>(dominating_block);
3709 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
3710 loop_header_block.complex_continue = true;
3711 }
3712 }
3713 else
3714 {
3715 // This should be very rare, but if we try to declare a temporary inside a loop,
3716 // and that temporary is used outside the loop as well (spirv-opt inliner likes this)
3717 // we should actually emit the temporary outside the loop.
3718 hoisted_temporaries.insert(var.first);
3719 forced_temporaries.insert(var.first);
3720
3721 auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary;
3722 block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
3723 }
3724 }
3725 else if (blocks.size() > 1)
3726 {
3727 // Keep track of the temporary as we might have to declare this temporary.
3728 // This can happen if the loop header dominates a temporary, but we have a complex fallback loop.
3729 // In this case, the header is actually inside the for (;;) {} block, and we have problems.
3730 // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block
3731 // declares the temporary.
3732 auto &block_temporaries = get<SPIRBlock>(dominating_block).potential_declare_temporary;
3733 block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
3734 }
3735 }
3736 }
3737
3738 unordered_set<uint32_t> seen_blocks;
3739
3740 // Now, try to analyze whether or not these variables are actually loop variables.
3741 for (auto &loop_variable : potential_loop_variables)
3742 {
3743 auto &var = get<SPIRVariable>(loop_variable.first);
3744 auto dominator = var.dominator;
3745 BlockID block = loop_variable.second;
3746
3747 // The variable was accessed in multiple continue blocks, ignore.
3748 if (block == BlockID(~(0u)) || block == BlockID(0))
3749 continue;
3750
3751 // Dead code.
3752 if (dominator == ID(0))
3753 continue;
3754
3755 BlockID header = 0;
3756
3757 // Find the loop header for this block if we are a continue block.
3758 {
3759 auto itr = ir.continue_block_to_loop_header.find(block);
3760 if (itr != end(ir.continue_block_to_loop_header))
3761 {
3762 header = itr->second;
3763 }
3764 else if (get<SPIRBlock>(block).continue_block == block)
3765 {
3766 // Also check for self-referential continue block.
3767 header = block;
3768 }
3769 }
3770
3771 assert(header);
3772 auto &header_block = get<SPIRBlock>(header);
3773 auto &blocks = handler.accessed_variables_to_block[loop_variable.first];
3774
3775 // If a loop variable is not used before the loop, it's probably not a loop variable.
3776 bool has_accessed_variable = blocks.count(header) != 0;
3777
3778 // Now, there are two conditions we need to meet for the variable to be a loop variable.
3779 // 1. The dominating block must have a branch-free path to the loop header,
3780 // this way we statically know which expression should be part of the loop variable initializer.
3781
3782 // Walk from the dominator, if there is one straight edge connecting
3783 // dominator and loop header, we statically know the loop initializer.
3784 bool static_loop_init = true;
3785 while (dominator != header)
3786 {
3787 if (blocks.count(dominator) != 0)
3788 has_accessed_variable = true;
3789
3790 auto &succ = cfg.get_succeeding_edges(dominator);
3791 if (succ.size() != 1)
3792 {
3793 static_loop_init = false;
3794 break;
3795 }
3796
3797 auto &pred = cfg.get_preceding_edges(succ.front());
3798 if (pred.size() != 1 || pred.front() != dominator)
3799 {
3800 static_loop_init = false;
3801 break;
3802 }
3803
3804 dominator = succ.front();
3805 }
3806
3807 if (!static_loop_init || !has_accessed_variable)
3808 continue;
3809
3810 // The second condition we need to meet is that no access after the loop
3811 // merge can occur. Walk the CFG to see if we find anything.
3812
3813 seen_blocks.clear();
3814 cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool {
3815 // We found a block which accesses the variable outside the loop.
3816 if (blocks.find(walk_block) != end(blocks))
3817 static_loop_init = false;
3818 return true;
3819 });
3820
3821 if (!static_loop_init)
3822 continue;
3823
3824 // We have a loop variable.
3825 header_block.loop_variables.push_back(loop_variable.first);
3826 // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order
3827 // will break reproducability in regression runs.
3828 sort(begin(header_block.loop_variables), end(header_block.loop_variables));
3829 get<SPIRVariable>(loop_variable.first).loop_variable = true;
3830 }
3831 }
3832
may_read_undefined_variable_in_block(const SPIRBlock & block,uint32_t var)3833 bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var)
3834 {
3835 for (auto &op : block.ops)
3836 {
3837 auto *ops = stream(op);
3838 switch (op.op)
3839 {
3840 case OpStore:
3841 case OpCopyMemory:
3842 if (ops[0] == var)
3843 return false;
3844 break;
3845
3846 case OpAccessChain:
3847 case OpInBoundsAccessChain:
3848 case OpPtrAccessChain:
3849 // Access chains are generally used to partially read and write. It's too hard to analyze
3850 // if all constituents are written fully before continuing, so just assume it's preserved.
3851 // This is the same as the parameter preservation analysis.
3852 if (ops[2] == var)
3853 return true;
3854 break;
3855
3856 case OpSelect:
3857 // Variable pointers.
3858 // We might read before writing.
3859 if (ops[3] == var || ops[4] == var)
3860 return true;
3861 break;
3862
3863 case OpPhi:
3864 {
3865 // Variable pointers.
3866 // We might read before writing.
3867 if (op.length < 2)
3868 break;
3869
3870 uint32_t count = op.length - 2;
3871 for (uint32_t i = 0; i < count; i += 2)
3872 if (ops[i + 2] == var)
3873 return true;
3874 break;
3875 }
3876
3877 case OpCopyObject:
3878 case OpLoad:
3879 if (ops[2] == var)
3880 return true;
3881 break;
3882
3883 case OpFunctionCall:
3884 {
3885 if (op.length < 3)
3886 break;
3887
3888 // May read before writing.
3889 uint32_t count = op.length - 3;
3890 for (uint32_t i = 0; i < count; i++)
3891 if (ops[i + 3] == var)
3892 return true;
3893 break;
3894 }
3895
3896 default:
3897 break;
3898 }
3899 }
3900
3901 // Not accessed somehow, at least not in a usual fashion.
3902 // It's likely accessed in a branch, so assume we must preserve.
3903 return true;
3904 }
3905
get_buffer_block_flags(VariableID id) const3906 Bitset Compiler::get_buffer_block_flags(VariableID id) const
3907 {
3908 return ir.get_buffer_block_flags(get<SPIRVariable>(id));
3909 }
3910
get_common_basic_type(const SPIRType & type,SPIRType::BaseType & base_type)3911 bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
3912 {
3913 if (type.basetype == SPIRType::Struct)
3914 {
3915 base_type = SPIRType::Unknown;
3916 for (auto &member_type : type.member_types)
3917 {
3918 SPIRType::BaseType member_base;
3919 if (!get_common_basic_type(get<SPIRType>(member_type), member_base))
3920 return false;
3921
3922 if (base_type == SPIRType::Unknown)
3923 base_type = member_base;
3924 else if (base_type != member_base)
3925 return false;
3926 }
3927 return true;
3928 }
3929 else
3930 {
3931 base_type = type.basetype;
3932 return true;
3933 }
3934 }
3935
handle_builtin(const SPIRType & type,BuiltIn builtin,const Bitset & decoration_flags)3936 void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin,
3937 const Bitset &decoration_flags)
3938 {
3939 // If used, we will need to explicitly declare a new array size for these builtins.
3940
3941 if (builtin == BuiltInClipDistance)
3942 {
3943 if (!type.array_size_literal[0])
3944 SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal.");
3945 uint32_t array_size = type.array[0];
3946 if (array_size == 0)
3947 SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized.");
3948 compiler.clip_distance_count = array_size;
3949 }
3950 else if (builtin == BuiltInCullDistance)
3951 {
3952 if (!type.array_size_literal[0])
3953 SPIRV_CROSS_THROW("Array size for CullDistance must be a literal.");
3954 uint32_t array_size = type.array[0];
3955 if (array_size == 0)
3956 SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized.");
3957 compiler.cull_distance_count = array_size;
3958 }
3959 else if (builtin == BuiltInPosition)
3960 {
3961 if (decoration_flags.get(DecorationInvariant))
3962 compiler.position_invariant = true;
3963 }
3964 }
3965
add_if_builtin(uint32_t id,bool allow_blocks)3966 void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id, bool allow_blocks)
3967 {
3968 // Only handle plain variables here.
3969 // Builtins which are part of a block are handled in AccessChain.
3970 // If allow_blocks is used however, this is to handle initializers of blocks,
3971 // which implies that all members are written to.
3972
3973 auto *var = compiler.maybe_get<SPIRVariable>(id);
3974 auto *m = compiler.ir.find_meta(id);
3975 if (var && m)
3976 {
3977 auto &type = compiler.get<SPIRType>(var->basetype);
3978 auto &decorations = m->decoration;
3979 auto &flags = type.storage == StorageClassInput ?
3980 compiler.active_input_builtins : compiler.active_output_builtins;
3981 if (decorations.builtin)
3982 {
3983 flags.set(decorations.builtin_type);
3984 handle_builtin(type, decorations.builtin_type, decorations.decoration_flags);
3985 }
3986 else if (allow_blocks && compiler.has_decoration(type.self, DecorationBlock))
3987 {
3988 uint32_t member_count = uint32_t(type.member_types.size());
3989 for (uint32_t i = 0; i < member_count; i++)
3990 {
3991 if (compiler.has_member_decoration(type.self, i, DecorationBuiltIn))
3992 {
3993 auto &member_type = compiler.get<SPIRType>(type.member_types[i]);
3994 BuiltIn builtin = BuiltIn(compiler.get_member_decoration(type.self, i, DecorationBuiltIn));
3995 flags.set(builtin);
3996 handle_builtin(member_type, builtin, compiler.get_member_decoration_bitset(type.self, i));
3997 }
3998 }
3999 }
4000 }
4001 }
4002
add_if_builtin(uint32_t id)4003 void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id)
4004 {
4005 add_if_builtin(id, false);
4006 }
4007
add_if_builtin_or_block(uint32_t id)4008 void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id)
4009 {
4010 add_if_builtin(id, true);
4011 }
4012
handle(spv::Op opcode,const uint32_t * args,uint32_t length)4013 bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
4014 {
4015 switch (opcode)
4016 {
4017 case OpStore:
4018 if (length < 1)
4019 return false;
4020
4021 add_if_builtin(args[0]);
4022 break;
4023
4024 case OpCopyMemory:
4025 if (length < 2)
4026 return false;
4027
4028 add_if_builtin(args[0]);
4029 add_if_builtin(args[1]);
4030 break;
4031
4032 case OpCopyObject:
4033 case OpLoad:
4034 if (length < 3)
4035 return false;
4036
4037 add_if_builtin(args[2]);
4038 break;
4039
4040 case OpSelect:
4041 if (length < 5)
4042 return false;
4043
4044 add_if_builtin(args[3]);
4045 add_if_builtin(args[4]);
4046 break;
4047
4048 case OpPhi:
4049 {
4050 if (length < 2)
4051 return false;
4052
4053 uint32_t count = length - 2;
4054 args += 2;
4055 for (uint32_t i = 0; i < count; i += 2)
4056 add_if_builtin(args[i]);
4057 break;
4058 }
4059
4060 case OpFunctionCall:
4061 {
4062 if (length < 3)
4063 return false;
4064
4065 uint32_t count = length - 3;
4066 args += 3;
4067 for (uint32_t i = 0; i < count; i++)
4068 add_if_builtin(args[i]);
4069 break;
4070 }
4071
4072 case OpAccessChain:
4073 case OpInBoundsAccessChain:
4074 case OpPtrAccessChain:
4075 {
4076 if (length < 4)
4077 return false;
4078
4079 // Only consider global variables, cannot consider variables in functions yet, or other
4080 // access chains as they have not been created yet.
4081 auto *var = compiler.maybe_get<SPIRVariable>(args[2]);
4082 if (!var)
4083 break;
4084
4085 // Required if we access chain into builtins like gl_GlobalInvocationID.
4086 add_if_builtin(args[2]);
4087
4088 // Start traversing type hierarchy at the proper non-pointer types.
4089 auto *type = &compiler.get_variable_data_type(*var);
4090
4091 auto &flags =
4092 var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;
4093
4094 uint32_t count = length - 3;
4095 args += 3;
4096 for (uint32_t i = 0; i < count; i++)
4097 {
4098 // Pointers
4099 if (opcode == OpPtrAccessChain && i == 0)
4100 {
4101 type = &compiler.get<SPIRType>(type->parent_type);
4102 continue;
4103 }
4104
4105 // Arrays
4106 if (!type->array.empty())
4107 {
4108 type = &compiler.get<SPIRType>(type->parent_type);
4109 }
4110 // Structs
4111 else if (type->basetype == SPIRType::Struct)
4112 {
4113 uint32_t index = compiler.get<SPIRConstant>(args[i]).scalar();
4114
4115 if (index < uint32_t(compiler.ir.meta[type->self].members.size()))
4116 {
4117 auto &decorations = compiler.ir.meta[type->self].members[index];
4118 if (decorations.builtin)
4119 {
4120 flags.set(decorations.builtin_type);
4121 handle_builtin(compiler.get<SPIRType>(type->member_types[index]), decorations.builtin_type,
4122 decorations.decoration_flags);
4123 }
4124 }
4125
4126 type = &compiler.get<SPIRType>(type->member_types[index]);
4127 }
4128 else
4129 {
4130 // No point in traversing further. We won't find any extra builtins.
4131 break;
4132 }
4133 }
4134 break;
4135 }
4136
4137 default:
4138 break;
4139 }
4140
4141 return true;
4142 }
4143
update_active_builtins()4144 void Compiler::update_active_builtins()
4145 {
4146 active_input_builtins.reset();
4147 active_output_builtins.reset();
4148 cull_distance_count = 0;
4149 clip_distance_count = 0;
4150 ActiveBuiltinHandler handler(*this);
4151 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4152
4153 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
4154 if (var.storage != StorageClassOutput)
4155 return;
4156 if (!interface_variable_exists_in_entry_point(var.self))
4157 return;
4158
4159 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
4160 if (var.initializer != ID(0))
4161 handler.add_if_builtin_or_block(var.self);
4162 });
4163 }
4164
4165 // Returns whether this shader uses a builtin of the storage class
has_active_builtin(BuiltIn builtin,StorageClass storage) const4166 bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const
4167 {
4168 const Bitset *flags;
4169 switch (storage)
4170 {
4171 case StorageClassInput:
4172 flags = &active_input_builtins;
4173 break;
4174 case StorageClassOutput:
4175 flags = &active_output_builtins;
4176 break;
4177
4178 default:
4179 return false;
4180 }
4181 return flags->get(builtin);
4182 }
4183
analyze_image_and_sampler_usage()4184 void Compiler::analyze_image_and_sampler_usage()
4185 {
4186 CombinedImageSamplerDrefHandler dref_handler(*this);
4187 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), dref_handler);
4188
4189 CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers);
4190 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4191
4192 // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions
4193 // down to main().
4194 // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions.
4195 handler.dependency_hierarchy.clear();
4196 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4197
4198 comparison_ids = move(handler.comparison_ids);
4199 need_subpass_input = handler.need_subpass_input;
4200
4201 // Forward information from separate images and samplers into combined image samplers.
4202 for (auto &combined : combined_image_samplers)
4203 if (comparison_ids.count(combined.sampler_id))
4204 comparison_ids.insert(combined.combined_id);
4205 }
4206
handle(spv::Op opcode,const uint32_t * args,uint32_t)4207 bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t)
4208 {
4209 // Mark all sampled images which are used with Dref.
4210 switch (opcode)
4211 {
4212 case OpImageSampleDrefExplicitLod:
4213 case OpImageSampleDrefImplicitLod:
4214 case OpImageSampleProjDrefExplicitLod:
4215 case OpImageSampleProjDrefImplicitLod:
4216 case OpImageSparseSampleProjDrefImplicitLod:
4217 case OpImageSparseSampleDrefImplicitLod:
4218 case OpImageSparseSampleProjDrefExplicitLod:
4219 case OpImageSparseSampleDrefExplicitLod:
4220 case OpImageDrefGather:
4221 case OpImageSparseDrefGather:
4222 dref_combined_samplers.insert(args[2]);
4223 return true;
4224
4225 default:
4226 break;
4227 }
4228
4229 return true;
4230 }
4231
get_cfg_for_current_function() const4232 const CFG &Compiler::get_cfg_for_current_function() const
4233 {
4234 assert(current_function);
4235 return get_cfg_for_function(current_function->self);
4236 }
4237
get_cfg_for_function(uint32_t id) const4238 const CFG &Compiler::get_cfg_for_function(uint32_t id) const
4239 {
4240 auto cfg_itr = function_cfgs.find(id);
4241 assert(cfg_itr != end(function_cfgs));
4242 assert(cfg_itr->second);
4243 return *cfg_itr->second;
4244 }
4245
build_function_control_flow_graphs_and_analyze()4246 void Compiler::build_function_control_flow_graphs_and_analyze()
4247 {
4248 CFGBuilder handler(*this);
4249 handler.function_cfgs[ir.default_entry_point].reset(new CFG(*this, get<SPIRFunction>(ir.default_entry_point)));
4250 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4251 function_cfgs = move(handler.function_cfgs);
4252 bool single_function = function_cfgs.size() <= 1;
4253
4254 for (auto &f : function_cfgs)
4255 {
4256 auto &func = get<SPIRFunction>(f.first);
4257 AnalyzeVariableScopeAccessHandler scope_handler(*this, func);
4258 analyze_variable_scope(func, scope_handler);
4259 find_function_local_luts(func, scope_handler, single_function);
4260
4261 // Check if we can actually use the loop variables we found in analyze_variable_scope.
4262 // To use multiple initializers, we need the same type and qualifiers.
4263 for (auto block : func.blocks)
4264 {
4265 auto &b = get<SPIRBlock>(block);
4266 if (b.loop_variables.size() < 2)
4267 continue;
4268
4269 auto &flags = get_decoration_bitset(b.loop_variables.front());
4270 uint32_t type = get<SPIRVariable>(b.loop_variables.front()).basetype;
4271 bool invalid_initializers = false;
4272 for (auto loop_variable : b.loop_variables)
4273 {
4274 if (flags != get_decoration_bitset(loop_variable) ||
4275 type != get<SPIRVariable>(b.loop_variables.front()).basetype)
4276 {
4277 invalid_initializers = true;
4278 break;
4279 }
4280 }
4281
4282 if (invalid_initializers)
4283 {
4284 for (auto loop_variable : b.loop_variables)
4285 get<SPIRVariable>(loop_variable).loop_variable = false;
4286 b.loop_variables.clear();
4287 }
4288 }
4289 }
4290 }
4291
CFGBuilder(Compiler & compiler_)4292 Compiler::CFGBuilder::CFGBuilder(Compiler &compiler_)
4293 : compiler(compiler_)
4294 {
4295 }
4296
handle(spv::Op,const uint32_t *,uint32_t)4297 bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t)
4298 {
4299 return true;
4300 }
4301
follow_function_call(const SPIRFunction & func)4302 bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func)
4303 {
4304 if (function_cfgs.find(func.self) == end(function_cfgs))
4305 {
4306 function_cfgs[func.self].reset(new CFG(compiler, func));
4307 return true;
4308 }
4309 else
4310 return false;
4311 }
4312
add_dependency(uint32_t dst,uint32_t src)4313 void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src)
4314 {
4315 dependency_hierarchy[dst].insert(src);
4316 // Propagate up any comparison state if we're loading from one such variable.
4317 if (comparison_ids.count(src))
4318 comparison_ids.insert(dst);
4319 }
4320
begin_function_scope(const uint32_t * args,uint32_t length)4321 bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length)
4322 {
4323 if (length < 3)
4324 return false;
4325
4326 auto &func = compiler.get<SPIRFunction>(args[2]);
4327 const auto *arg = &args[3];
4328 length -= 3;
4329
4330 for (uint32_t i = 0; i < length; i++)
4331 {
4332 auto &argument = func.arguments[i];
4333 add_dependency(argument.id, arg[i]);
4334 }
4335
4336 return true;
4337 }
4338
add_hierarchy_to_comparison_ids(uint32_t id)4339 void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id)
4340 {
4341 // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids.
4342 comparison_ids.insert(id);
4343
4344 for (auto &dep_id : dependency_hierarchy[id])
4345 add_hierarchy_to_comparison_ids(dep_id);
4346 }
4347
handle(Op opcode,const uint32_t * args,uint32_t length)4348 bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
4349 {
4350 switch (opcode)
4351 {
4352 case OpAccessChain:
4353 case OpInBoundsAccessChain:
4354 case OpPtrAccessChain:
4355 case OpLoad:
4356 {
4357 if (length < 3)
4358 return false;
4359
4360 add_dependency(args[1], args[2]);
4361
4362 // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
4363 // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
4364 auto &type = compiler.get<SPIRType>(args[0]);
4365 if (type.image.dim == DimSubpassData)
4366 need_subpass_input = true;
4367
4368 // If we load a SampledImage and it will be used with Dref, propagate the state up.
4369 if (dref_combined_samplers.count(args[1]) != 0)
4370 add_hierarchy_to_comparison_ids(args[1]);
4371 break;
4372 }
4373
4374 case OpSampledImage:
4375 {
4376 if (length < 4)
4377 return false;
4378
4379 uint32_t result_type = args[0];
4380 uint32_t result_id = args[1];
4381 auto &type = compiler.get<SPIRType>(result_type);
4382
4383 // If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
4384 // This image must be a depth image.
4385 uint32_t image = args[2];
4386 uint32_t sampler = args[3];
4387
4388 if (type.image.depth || dref_combined_samplers.count(result_id) != 0)
4389 {
4390 add_hierarchy_to_comparison_ids(image);
4391
4392 // This sampler must be a SamplerComparisonState, and not a regular SamplerState.
4393 add_hierarchy_to_comparison_ids(sampler);
4394
4395 // Mark the OpSampledImage itself as being comparison state.
4396 comparison_ids.insert(result_id);
4397 }
4398 return true;
4399 }
4400
4401 default:
4402 break;
4403 }
4404
4405 return true;
4406 }
4407
buffer_is_hlsl_counter_buffer(VariableID id) const4408 bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const
4409 {
4410 auto *m = ir.find_meta(id);
4411 return m && m->hlsl_is_magic_counter_buffer;
4412 }
4413
buffer_get_hlsl_counter_buffer(VariableID id,uint32_t & counter_id) const4414 bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const
4415 {
4416 auto *m = ir.find_meta(id);
4417
4418 // First, check for the proper decoration.
4419 if (m && m->hlsl_magic_counter_buffer != 0)
4420 {
4421 counter_id = m->hlsl_magic_counter_buffer;
4422 return true;
4423 }
4424 else
4425 return false;
4426 }
4427
make_constant_null(uint32_t id,uint32_t type)4428 void Compiler::make_constant_null(uint32_t id, uint32_t type)
4429 {
4430 auto &constant_type = get<SPIRType>(type);
4431
4432 if (constant_type.pointer)
4433 {
4434 auto &constant = set<SPIRConstant>(id, type);
4435 constant.make_null(constant_type);
4436 }
4437 else if (!constant_type.array.empty())
4438 {
4439 assert(constant_type.parent_type);
4440 uint32_t parent_id = ir.increase_bound_by(1);
4441 make_constant_null(parent_id, constant_type.parent_type);
4442
4443 if (!constant_type.array_size_literal.back())
4444 SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
4445
4446 SmallVector<uint32_t> elements(constant_type.array.back());
4447 for (uint32_t i = 0; i < constant_type.array.back(); i++)
4448 elements[i] = parent_id;
4449 set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
4450 }
4451 else if (!constant_type.member_types.empty())
4452 {
4453 uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size()));
4454 SmallVector<uint32_t> elements(constant_type.member_types.size());
4455 for (uint32_t i = 0; i < constant_type.member_types.size(); i++)
4456 {
4457 make_constant_null(member_ids + i, constant_type.member_types[i]);
4458 elements[i] = member_ids + i;
4459 }
4460 set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
4461 }
4462 else
4463 {
4464 auto &constant = set<SPIRConstant>(id, type);
4465 constant.make_null(constant_type);
4466 }
4467 }
4468
get_declared_capabilities() const4469 const SmallVector<spv::Capability> &Compiler::get_declared_capabilities() const
4470 {
4471 return ir.declared_capabilities;
4472 }
4473
get_declared_extensions() const4474 const SmallVector<std::string> &Compiler::get_declared_extensions() const
4475 {
4476 return ir.declared_extensions;
4477 }
4478
get_remapped_declared_block_name(VariableID id) const4479 std::string Compiler::get_remapped_declared_block_name(VariableID id) const
4480 {
4481 return get_remapped_declared_block_name(id, false);
4482 }
4483
get_remapped_declared_block_name(uint32_t id,bool fallback_prefer_instance_name) const4484 std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const
4485 {
4486 auto itr = declared_block_names.find(id);
4487 if (itr != end(declared_block_names))
4488 {
4489 return itr->second;
4490 }
4491 else
4492 {
4493 auto &var = get<SPIRVariable>(id);
4494
4495 if (fallback_prefer_instance_name)
4496 {
4497 return to_name(var.self);
4498 }
4499 else
4500 {
4501 auto &type = get<SPIRType>(var.basetype);
4502 auto *type_meta = ir.find_meta(type.self);
4503 auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr;
4504 return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name;
4505 }
4506 }
4507 }
4508
reflection_ssbo_instance_name_is_significant() const4509 bool Compiler::reflection_ssbo_instance_name_is_significant() const
4510 {
4511 if (ir.source.known)
4512 {
4513 // UAVs from HLSL source tend to be declared in a way where the type is reused
4514 // but the instance name is significant, and that's the name we should report.
4515 // For GLSL, SSBOs each have their own block type as that's how GLSL is written.
4516 return ir.source.hlsl;
4517 }
4518
4519 unordered_set<uint32_t> ssbo_type_ids;
4520 bool aliased_ssbo_types = false;
4521
4522 // If we don't have any OpSource information, we need to perform some shaky heuristics.
4523 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
4524 auto &type = this->get<SPIRType>(var.basetype);
4525 if (!type.pointer || var.storage == StorageClassFunction)
4526 return;
4527
4528 bool ssbo = var.storage == StorageClassStorageBuffer ||
4529 (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock));
4530
4531 if (ssbo)
4532 {
4533 if (ssbo_type_ids.count(type.self))
4534 aliased_ssbo_types = true;
4535 else
4536 ssbo_type_ids.insert(type.self);
4537 }
4538 });
4539
4540 // If the block name is aliased, assume we have HLSL-style UAV declarations.
4541 return aliased_ssbo_types;
4542 }
4543
instruction_to_result_type(uint32_t & result_type,uint32_t & result_id,spv::Op op,const uint32_t * args,uint32_t length)4544 bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args,
4545 uint32_t length)
4546 {
4547 // Most instructions follow the pattern of <result-type> <result-id> <arguments>.
4548 // There are some exceptions.
4549 switch (op)
4550 {
4551 case OpStore:
4552 case OpCopyMemory:
4553 case OpCopyMemorySized:
4554 case OpImageWrite:
4555 case OpAtomicStore:
4556 case OpAtomicFlagClear:
4557 case OpEmitStreamVertex:
4558 case OpEndStreamPrimitive:
4559 case OpControlBarrier:
4560 case OpMemoryBarrier:
4561 case OpGroupWaitEvents:
4562 case OpRetainEvent:
4563 case OpReleaseEvent:
4564 case OpSetUserEventStatus:
4565 case OpCaptureEventProfilingInfo:
4566 case OpCommitReadPipe:
4567 case OpCommitWritePipe:
4568 case OpGroupCommitReadPipe:
4569 case OpGroupCommitWritePipe:
4570 case OpLine:
4571 case OpNoLine:
4572 return false;
4573
4574 default:
4575 if (length > 1 && maybe_get<SPIRType>(args[0]) != nullptr)
4576 {
4577 result_type = args[0];
4578 result_id = args[1];
4579 return true;
4580 }
4581 else
4582 return false;
4583 }
4584 }
4585
combined_decoration_for_member(const SPIRType & type,uint32_t index) const4586 Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const
4587 {
4588 Bitset flags;
4589 auto *type_meta = ir.find_meta(type.self);
4590
4591 if (type_meta)
4592 {
4593 auto &members = type_meta->members;
4594 if (index >= members.size())
4595 return flags;
4596 auto &dec = members[index];
4597
4598 flags.merge_or(dec.decoration_flags);
4599
4600 auto &member_type = get<SPIRType>(type.member_types[index]);
4601
4602 // If our member type is a struct, traverse all the child members as well recursively.
4603 auto &member_childs = member_type.member_types;
4604 for (uint32_t i = 0; i < member_childs.size(); i++)
4605 {
4606 auto &child_member_type = get<SPIRType>(member_childs[i]);
4607 if (!child_member_type.pointer)
4608 flags.merge_or(combined_decoration_for_member(member_type, i));
4609 }
4610 }
4611
4612 return flags;
4613 }
4614
is_desktop_only_format(spv::ImageFormat format)4615 bool Compiler::is_desktop_only_format(spv::ImageFormat format)
4616 {
4617 switch (format)
4618 {
4619 // Desktop-only formats
4620 case ImageFormatR11fG11fB10f:
4621 case ImageFormatR16f:
4622 case ImageFormatRgb10A2:
4623 case ImageFormatR8:
4624 case ImageFormatRg8:
4625 case ImageFormatR16:
4626 case ImageFormatRg16:
4627 case ImageFormatRgba16:
4628 case ImageFormatR16Snorm:
4629 case ImageFormatRg16Snorm:
4630 case ImageFormatRgba16Snorm:
4631 case ImageFormatR8Snorm:
4632 case ImageFormatRg8Snorm:
4633 case ImageFormatR8ui:
4634 case ImageFormatRg8ui:
4635 case ImageFormatR16ui:
4636 case ImageFormatRgb10a2ui:
4637 case ImageFormatR8i:
4638 case ImageFormatRg8i:
4639 case ImageFormatR16i:
4640 return true;
4641 default:
4642 break;
4643 }
4644
4645 return false;
4646 }
4647
image_is_comparison(const SPIRType & type,uint32_t id) const4648 bool Compiler::image_is_comparison(const SPIRType &type, uint32_t id) const
4649 {
4650 return type.image.depth || (comparison_ids.count(id) != 0);
4651 }
4652
type_is_opaque_value(const SPIRType & type) const4653 bool Compiler::type_is_opaque_value(const SPIRType &type) const
4654 {
4655 return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image ||
4656 type.basetype == SPIRType::Sampler);
4657 }
4658
4659 // Make these member functions so we can easily break on any force_recompile events.
force_recompile()4660 void Compiler::force_recompile()
4661 {
4662 is_force_recompile = true;
4663 }
4664
is_forcing_recompilation() const4665 bool Compiler::is_forcing_recompilation() const
4666 {
4667 return is_force_recompile;
4668 }
4669
clear_force_recompile()4670 void Compiler::clear_force_recompile()
4671 {
4672 is_force_recompile = false;
4673 }
4674
PhysicalStorageBufferPointerHandler(Compiler & compiler_)4675 Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_)
4676 : compiler(compiler_)
4677 {
4678 }
4679
handle(Op op,const uint32_t * args,uint32_t)4680 bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t)
4681 {
4682 if (op == OpConvertUToPtr || op == OpBitcast)
4683 {
4684 auto &type = compiler.get<SPIRType>(args[0]);
4685 if (type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && type.pointer_depth == 1)
4686 {
4687 // If we need to cast to a pointer type which is not a block, we might need to synthesize ourselves
4688 // a block type which wraps this POD type.
4689 if (type.basetype != SPIRType::Struct)
4690 types.insert(args[0]);
4691 }
4692 }
4693
4694 return true;
4695 }
4696
analyze_non_block_pointer_types()4697 void Compiler::analyze_non_block_pointer_types()
4698 {
4699 PhysicalStorageBufferPointerHandler handler(*this);
4700 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4701 physical_storage_non_block_pointer_types.reserve(handler.types.size());
4702 for (auto type : handler.types)
4703 physical_storage_non_block_pointer_types.push_back(type);
4704 sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
4705 }
4706
handle(Op op,const uint32_t *,uint32_t)4707 bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
4708 {
4709 if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
4710 {
4711 if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
4712 {
4713 // Most complex case, we have no sensible way of dealing with this
4714 // other than taking the 100% conservative approach, exit early.
4715 split_function_case = true;
4716 return false;
4717 }
4718 else
4719 {
4720 interlock_function_id = call_stack.back();
4721 // If this call is performed inside control flow we have a problem.
4722 auto &cfg = compiler.get_cfg_for_function(interlock_function_id);
4723
4724 uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block;
4725 bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id);
4726 if (!outside_control_flow)
4727 control_flow_interlock = true;
4728 }
4729 }
4730 return true;
4731 }
4732
rearm_current_block(const SPIRBlock & block)4733 void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
4734 {
4735 current_block_id = block.self;
4736 }
4737
begin_function_scope(const uint32_t * args,uint32_t length)4738 bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
4739 {
4740 if (length < 3)
4741 return false;
4742 call_stack.push_back(args[2]);
4743 return true;
4744 }
4745
end_function_scope(const uint32_t *,uint32_t)4746 bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
4747 {
4748 call_stack.pop_back();
4749 return true;
4750 }
4751
begin_function_scope(const uint32_t * args,uint32_t length)4752 bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
4753 {
4754 if (length < 3)
4755 return false;
4756
4757 if (args[2] == interlock_function_id)
4758 call_stack_is_interlocked = true;
4759
4760 call_stack.push_back(args[2]);
4761 return true;
4762 }
4763
end_function_scope(const uint32_t *,uint32_t)4764 bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
4765 {
4766 if (call_stack.back() == interlock_function_id)
4767 call_stack_is_interlocked = false;
4768
4769 call_stack.pop_back();
4770 return true;
4771 }
4772
access_potential_resource(uint32_t id)4773 void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
4774 {
4775 if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) ||
4776 split_function_case)
4777 {
4778 compiler.interlocked_resources.insert(id);
4779 }
4780 }
4781
handle(Op opcode,const uint32_t * args,uint32_t length)4782 bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
4783 {
4784 // Only care about critical section analysis if we have simple case.
4785 if (use_critical_section)
4786 {
4787 if (opcode == OpBeginInvocationInterlockEXT)
4788 {
4789 in_crit_sec = true;
4790 return true;
4791 }
4792
4793 if (opcode == OpEndInvocationInterlockEXT)
4794 {
4795 // End critical section--nothing more to do.
4796 return false;
4797 }
4798 }
4799
4800 // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
4801 switch (opcode)
4802 {
4803 case OpLoad:
4804 {
4805 if (length < 3)
4806 return false;
4807
4808 uint32_t ptr = args[2];
4809 auto *var = compiler.maybe_get_backing_variable(ptr);
4810
4811 // We're only concerned with buffer and image memory here.
4812 if (!var)
4813 break;
4814
4815 switch (var->storage)
4816 {
4817 default:
4818 break;
4819
4820 case StorageClassUniformConstant:
4821 {
4822 uint32_t result_type = args[0];
4823 uint32_t id = args[1];
4824 compiler.set<SPIRExpression>(id, "", result_type, true);
4825 compiler.register_read(id, ptr, true);
4826 break;
4827 }
4828
4829 case StorageClassUniform:
4830 // Must have BufferBlock; we only care about SSBOs.
4831 if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
4832 break;
4833 // fallthrough
4834 case StorageClassStorageBuffer:
4835 access_potential_resource(var->self);
4836 break;
4837 }
4838 break;
4839 }
4840
4841 case OpInBoundsAccessChain:
4842 case OpAccessChain:
4843 case OpPtrAccessChain:
4844 {
4845 if (length < 3)
4846 return false;
4847
4848 uint32_t result_type = args[0];
4849
4850 auto &type = compiler.get<SPIRType>(result_type);
4851 if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
4852 type.storage == StorageClassStorageBuffer)
4853 {
4854 uint32_t id = args[1];
4855 uint32_t ptr = args[2];
4856 compiler.set<SPIRExpression>(id, "", result_type, true);
4857 compiler.register_read(id, ptr, true);
4858 compiler.ir.ids[id].set_allow_type_rewrite();
4859 }
4860 break;
4861 }
4862
4863 case OpImageTexelPointer:
4864 {
4865 if (length < 3)
4866 return false;
4867
4868 uint32_t result_type = args[0];
4869 uint32_t id = args[1];
4870 uint32_t ptr = args[2];
4871 auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
4872 auto *var = compiler.maybe_get_backing_variable(ptr);
4873 if (var)
4874 e.loaded_from = var->self;
4875 break;
4876 }
4877
4878 case OpStore:
4879 case OpImageWrite:
4880 case OpAtomicStore:
4881 {
4882 if (length < 1)
4883 return false;
4884
4885 uint32_t ptr = args[0];
4886 auto *var = compiler.maybe_get_backing_variable(ptr);
4887 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
4888 var->storage == StorageClassStorageBuffer))
4889 {
4890 access_potential_resource(var->self);
4891 }
4892
4893 break;
4894 }
4895
4896 case OpCopyMemory:
4897 {
4898 if (length < 2)
4899 return false;
4900
4901 uint32_t dst = args[0];
4902 uint32_t src = args[1];
4903 auto *dst_var = compiler.maybe_get_backing_variable(dst);
4904 auto *src_var = compiler.maybe_get_backing_variable(src);
4905
4906 if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
4907 access_potential_resource(dst_var->self);
4908
4909 if (src_var)
4910 {
4911 if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
4912 break;
4913
4914 if (src_var->storage == StorageClassUniform &&
4915 !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
4916 {
4917 break;
4918 }
4919
4920 access_potential_resource(src_var->self);
4921 }
4922
4923 break;
4924 }
4925
4926 case OpImageRead:
4927 case OpAtomicLoad:
4928 {
4929 if (length < 3)
4930 return false;
4931
4932 uint32_t ptr = args[2];
4933 auto *var = compiler.maybe_get_backing_variable(ptr);
4934
4935 // We're only concerned with buffer and image memory here.
4936 if (!var)
4937 break;
4938
4939 switch (var->storage)
4940 {
4941 default:
4942 break;
4943
4944 case StorageClassUniform:
4945 // Must have BufferBlock; we only care about SSBOs.
4946 if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
4947 break;
4948 // fallthrough
4949 case StorageClassUniformConstant:
4950 case StorageClassStorageBuffer:
4951 access_potential_resource(var->self);
4952 break;
4953 }
4954 break;
4955 }
4956
4957 case OpAtomicExchange:
4958 case OpAtomicCompareExchange:
4959 case OpAtomicIIncrement:
4960 case OpAtomicIDecrement:
4961 case OpAtomicIAdd:
4962 case OpAtomicISub:
4963 case OpAtomicSMin:
4964 case OpAtomicUMin:
4965 case OpAtomicSMax:
4966 case OpAtomicUMax:
4967 case OpAtomicAnd:
4968 case OpAtomicOr:
4969 case OpAtomicXor:
4970 {
4971 if (length < 3)
4972 return false;
4973
4974 uint32_t ptr = args[2];
4975 auto *var = compiler.maybe_get_backing_variable(ptr);
4976 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
4977 var->storage == StorageClassStorageBuffer))
4978 {
4979 access_potential_resource(var->self);
4980 }
4981
4982 break;
4983 }
4984
4985 default:
4986 break;
4987 }
4988
4989 return true;
4990 }
4991
analyze_interlocked_resource_usage()4992 void Compiler::analyze_interlocked_resource_usage()
4993 {
4994 if (get_execution_model() == ExecutionModelFragment &&
4995 (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
4996 get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
4997 get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
4998 get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
4999 {
5000 InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
5001 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler);
5002
5003 InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
5004 handler.interlock_function_id = prepass_handler.interlock_function_id;
5005 handler.split_function_case = prepass_handler.split_function_case;
5006 handler.control_flow_interlock = prepass_handler.control_flow_interlock;
5007 handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
5008
5009 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
5010
5011 // For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
5012 interlocked_is_complex =
5013 !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point;
5014 }
5015 }
5016
type_is_array_of_pointers(const SPIRType & type) const5017 bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
5018 {
5019 if (!type.pointer)
5020 return false;
5021
5022 // If parent type has same pointer depth, we must have an array of pointers.
5023 return type.pointer_depth == get<SPIRType>(type.parent_type).pointer_depth;
5024 }
5025
type_is_top_level_physical_pointer(const SPIRType & type) const5026 bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const
5027 {
5028 return type.pointer && type.storage == StorageClassPhysicalStorageBuffer &&
5029 type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth;
5030 }
5031
flush_phi_required(BlockID from,BlockID to) const5032 bool Compiler::flush_phi_required(BlockID from, BlockID to) const
5033 {
5034 auto &child = get<SPIRBlock>(to);
5035 for (auto &phi : child.phi_variables)
5036 if (phi.parent == from)
5037 return true;
5038 return false;
5039 }
5040
add_loop_level()5041 void Compiler::add_loop_level()
5042 {
5043 current_loop_level++;
5044 }
5045