1 //
2 // Copyright (C) 2018 Google, Inc.
3 //
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions
8 // are met:
9 //
10 // Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //
13 // Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following
15 // disclaimer in the documentation and/or other materials provided
16 // with the distribution.
17 //
18 // Neither the name of 3Dlabs Inc. Ltd. nor the names of its
19 // contributors may be used to endorse or promote products derived
20 // from this software without specific prior written permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 // POSSIBILITY OF SUCH DAMAGE.
34
35 //
36 // Post-processing for SPIR-V IR, in internal form, not standard binary form.
37 //
38
39 #include <cassert>
40 #include <cstdlib>
41
42 #include <unordered_map>
43 #include <unordered_set>
44 #include <algorithm>
45
46 #include "SpvBuilder.h"
47
48 #include "spirv.hpp"
49 #include "GlslangToSpv.h"
50 #include "SpvBuilder.h"
51 namespace spv {
52 #include "GLSL.std.450.h"
53 #include "GLSL.ext.KHR.h"
54 #include "GLSL.ext.EXT.h"
55 #include "GLSL.ext.AMD.h"
56 #include "GLSL.ext.NV.h"
57 }
58
59 namespace spv {
60
61 #ifndef GLSLANG_WEB
62 // Hook to visit each operand type and result type of an instruction.
63 // Will be called multiple times for one instruction, once for each typed
64 // operand and the result.
postProcessType(const Instruction & inst,Id typeId)65 void Builder::postProcessType(const Instruction& inst, Id typeId)
66 {
67 // Characterize the type being questioned
68 Id basicTypeOp = getMostBasicTypeClass(typeId);
69 int width = 0;
70 if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt)
71 width = getScalarTypeWidth(typeId);
72
73 // Do opcode-specific checks
74 switch (inst.getOpCode()) {
75 case OpLoad:
76 case OpStore:
77 if (basicTypeOp == OpTypeStruct) {
78 if (containsType(typeId, OpTypeInt, 8))
79 addCapability(CapabilityInt8);
80 if (containsType(typeId, OpTypeInt, 16))
81 addCapability(CapabilityInt16);
82 if (containsType(typeId, OpTypeFloat, 16))
83 addCapability(CapabilityFloat16);
84 } else {
85 StorageClass storageClass = getStorageClass(inst.getIdOperand(0));
86 if (width == 8) {
87 switch (storageClass) {
88 case StorageClassPhysicalStorageBufferEXT:
89 case StorageClassUniform:
90 case StorageClassStorageBuffer:
91 case StorageClassPushConstant:
92 break;
93 default:
94 addCapability(CapabilityInt8);
95 break;
96 }
97 } else if (width == 16) {
98 switch (storageClass) {
99 case StorageClassPhysicalStorageBufferEXT:
100 case StorageClassUniform:
101 case StorageClassStorageBuffer:
102 case StorageClassPushConstant:
103 case StorageClassInput:
104 case StorageClassOutput:
105 break;
106 default:
107 if (basicTypeOp == OpTypeInt)
108 addCapability(CapabilityInt16);
109 if (basicTypeOp == OpTypeFloat)
110 addCapability(CapabilityFloat16);
111 break;
112 }
113 }
114 }
115 break;
116 case OpAccessChain:
117 case OpPtrAccessChain:
118 case OpCopyObject:
119 break;
120 case OpFConvert:
121 case OpSConvert:
122 case OpUConvert:
123 // Look for any 8/16-bit storage capabilities. If there are none, assume that
124 // the convert instruction requires the Float16/Int8/16 capability.
125 if (containsType(typeId, OpTypeFloat, 16) || containsType(typeId, OpTypeInt, 16)) {
126 bool foundStorage = false;
127 for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
128 spv::Capability cap = *it;
129 if (cap == spv::CapabilityStorageInputOutput16 ||
130 cap == spv::CapabilityStoragePushConstant16 ||
131 cap == spv::CapabilityStorageUniformBufferBlock16 ||
132 cap == spv::CapabilityStorageUniform16) {
133 foundStorage = true;
134 break;
135 }
136 }
137 if (!foundStorage) {
138 if (containsType(typeId, OpTypeFloat, 16))
139 addCapability(CapabilityFloat16);
140 if (containsType(typeId, OpTypeInt, 16))
141 addCapability(CapabilityInt16);
142 }
143 }
144 if (containsType(typeId, OpTypeInt, 8)) {
145 bool foundStorage = false;
146 for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
147 spv::Capability cap = *it;
148 if (cap == spv::CapabilityStoragePushConstant8 ||
149 cap == spv::CapabilityUniformAndStorageBuffer8BitAccess ||
150 cap == spv::CapabilityStorageBuffer8BitAccess) {
151 foundStorage = true;
152 break;
153 }
154 }
155 if (!foundStorage) {
156 addCapability(CapabilityInt8);
157 }
158 }
159 break;
160 case OpExtInst:
161 switch (inst.getImmediateOperand(1)) {
162 case GLSLstd450Frexp:
163 case GLSLstd450FrexpStruct:
164 if (getSpvVersion() < glslang::EShTargetSpv_1_3 && containsType(typeId, OpTypeInt, 16))
165 addExtension(spv::E_SPV_AMD_gpu_shader_int16);
166 break;
167 case GLSLstd450InterpolateAtCentroid:
168 case GLSLstd450InterpolateAtSample:
169 case GLSLstd450InterpolateAtOffset:
170 if (getSpvVersion() < glslang::EShTargetSpv_1_3 && containsType(typeId, OpTypeFloat, 16))
171 addExtension(spv::E_SPV_AMD_gpu_shader_half_float);
172 break;
173 default:
174 break;
175 }
176 break;
177 default:
178 if (basicTypeOp == OpTypeFloat && width == 16)
179 addCapability(CapabilityFloat16);
180 if (basicTypeOp == OpTypeInt && width == 16)
181 addCapability(CapabilityInt16);
182 if (basicTypeOp == OpTypeInt && width == 8)
183 addCapability(CapabilityInt8);
184 break;
185 }
186 }
187
188 // Called for each instruction that resides in a block.
postProcess(Instruction & inst)189 void Builder::postProcess(Instruction& inst)
190 {
191 // Add capabilities based simply on the opcode.
192 switch (inst.getOpCode()) {
193 case OpExtInst:
194 switch (inst.getImmediateOperand(1)) {
195 case GLSLstd450InterpolateAtCentroid:
196 case GLSLstd450InterpolateAtSample:
197 case GLSLstd450InterpolateAtOffset:
198 addCapability(CapabilityInterpolationFunction);
199 break;
200 default:
201 break;
202 }
203 break;
204 case OpDPdxFine:
205 case OpDPdyFine:
206 case OpFwidthFine:
207 case OpDPdxCoarse:
208 case OpDPdyCoarse:
209 case OpFwidthCoarse:
210 addCapability(CapabilityDerivativeControl);
211 break;
212
213 case OpImageQueryLod:
214 case OpImageQuerySize:
215 case OpImageQuerySizeLod:
216 case OpImageQuerySamples:
217 case OpImageQueryLevels:
218 addCapability(CapabilityImageQuery);
219 break;
220
221 case OpGroupNonUniformPartitionNV:
222 addExtension(E_SPV_NV_shader_subgroup_partitioned);
223 addCapability(CapabilityGroupNonUniformPartitionedNV);
224 break;
225
226 case OpLoad:
227 case OpStore:
228 {
229 // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain
230 // index list to compute the misalignment. The pre-existing alignment value
231 // (set via Builder::AccessChain::alignment) only accounts for the base of
232 // the reference type and any scalar component selection in the accesschain,
233 // and this function computes the rest from the SPIR-V Offset decorations.
234 Instruction *accessChain = module.getInstruction(inst.getIdOperand(0));
235 if (accessChain->getOpCode() == OpAccessChain) {
236 Instruction *base = module.getInstruction(accessChain->getIdOperand(0));
237 // Get the type of the base of the access chain. It must be a pointer type.
238 Id typeId = base->getTypeId();
239 Instruction *type = module.getInstruction(typeId);
240 assert(type->getOpCode() == OpTypePointer);
241 if (type->getImmediateOperand(0) != StorageClassPhysicalStorageBufferEXT) {
242 break;
243 }
244 // Get the pointee type.
245 typeId = type->getIdOperand(1);
246 type = module.getInstruction(typeId);
247 // Walk the index list for the access chain. For each index, find any
248 // misalignment that can apply when accessing the member/element via
249 // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all
250 // together.
251 int alignment = 0;
252 for (int i = 1; i < accessChain->getNumOperands(); ++i) {
253 Instruction *idx = module.getInstruction(accessChain->getIdOperand(i));
254 if (type->getOpCode() == OpTypeStruct) {
255 assert(idx->getOpCode() == OpConstant);
256 unsigned int c = idx->getImmediateOperand(0);
257
258 const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
259 if (decoration.get()->getOpCode() == OpMemberDecorate &&
260 decoration.get()->getIdOperand(0) == typeId &&
261 decoration.get()->getImmediateOperand(1) == c &&
262 (decoration.get()->getImmediateOperand(2) == DecorationOffset ||
263 decoration.get()->getImmediateOperand(2) == DecorationMatrixStride)) {
264 alignment |= decoration.get()->getImmediateOperand(3);
265 }
266 };
267 std::for_each(decorations.begin(), decorations.end(), function);
268 // get the next member type
269 typeId = type->getIdOperand(c);
270 type = module.getInstruction(typeId);
271 } else if (type->getOpCode() == OpTypeArray ||
272 type->getOpCode() == OpTypeRuntimeArray) {
273 const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
274 if (decoration.get()->getOpCode() == OpDecorate &&
275 decoration.get()->getIdOperand(0) == typeId &&
276 decoration.get()->getImmediateOperand(1) == DecorationArrayStride) {
277 alignment |= decoration.get()->getImmediateOperand(2);
278 }
279 };
280 std::for_each(decorations.begin(), decorations.end(), function);
281 // Get the element type
282 typeId = type->getIdOperand(0);
283 type = module.getInstruction(typeId);
284 } else {
285 // Once we get to any non-aggregate type, we're done.
286 break;
287 }
288 }
289 assert(inst.getNumOperands() >= 3);
290 unsigned int memoryAccess = inst.getImmediateOperand((inst.getOpCode() == OpStore) ? 2 : 1);
291 assert(memoryAccess & MemoryAccessAlignedMask);
292 static_cast<void>(memoryAccess);
293 // Compute the index of the alignment operand.
294 int alignmentIdx = 2;
295 if (inst.getOpCode() == OpStore)
296 alignmentIdx++;
297 // Merge new and old (mis)alignment
298 alignment |= inst.getImmediateOperand(alignmentIdx);
299 // Pick the LSB
300 alignment = alignment & ~(alignment & (alignment-1));
301 // update the Aligned operand
302 inst.setImmediateOperand(alignmentIdx, alignment);
303 }
304 break;
305 }
306
307 default:
308 break;
309 }
310
311 // Checks based on type
312 if (inst.getTypeId() != NoType)
313 postProcessType(inst, inst.getTypeId());
314 for (int op = 0; op < inst.getNumOperands(); ++op) {
315 if (inst.isIdOperand(op)) {
316 // In blocks, these are always result ids, but we are relying on
317 // getTypeId() to return NoType for things like OpLabel.
318 if (getTypeId(inst.getIdOperand(op)) != NoType)
319 postProcessType(inst, getTypeId(inst.getIdOperand(op)));
320 }
321 }
322 }
323 #endif
324
325 // comment in header
postProcessCFG()326 void Builder::postProcessCFG()
327 {
328 // reachableBlocks is the set of blockss reached via control flow, or which are
329 // unreachable continue targert or unreachable merge.
330 std::unordered_set<const Block*> reachableBlocks;
331 std::unordered_map<Block*, Block*> headerForUnreachableContinue;
332 std::unordered_set<Block*> unreachableMerges;
333 std::unordered_set<Id> unreachableDefinitions;
334 // Collect IDs defined in unreachable blocks. For each function, label the
335 // reachable blocks first. Then for each unreachable block, collect the
336 // result IDs of the instructions in it.
337 for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
338 Function* f = *fi;
339 Block* entry = f->getEntryBlock();
340 inReadableOrder(entry,
341 [&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue]
342 (Block* b, ReachReason why, Block* header) {
343 reachableBlocks.insert(b);
344 if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header;
345 if (why == ReachDeadMerge) unreachableMerges.insert(b);
346 });
347 for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
348 Block* b = *bi;
349 if (unreachableMerges.count(b) != 0 || headerForUnreachableContinue.count(b) != 0) {
350 auto ii = b->getInstructions().cbegin();
351 ++ii; // Keep potential decorations on the label.
352 for (; ii != b->getInstructions().cend(); ++ii)
353 unreachableDefinitions.insert(ii->get()->getResultId());
354 } else if (reachableBlocks.count(b) == 0) {
355 // The normal case for unreachable code. All definitions are considered dead.
356 for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii)
357 unreachableDefinitions.insert(ii->get()->getResultId());
358 }
359 }
360 }
361
362 // Modify unreachable merge blocks and unreachable continue targets.
363 // Delete their contents.
364 for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) {
365 (*mergeIter)->rewriteAsCanonicalUnreachableMerge();
366 }
367 for (auto continueIter = headerForUnreachableContinue.begin();
368 continueIter != headerForUnreachableContinue.end();
369 ++continueIter) {
370 Block* continue_target = continueIter->first;
371 Block* header = continueIter->second;
372 continue_target->rewriteAsCanonicalUnreachableContinue(header);
373 }
374
375 // Remove unneeded decorations, for unreachable instructions
376 decorations.erase(std::remove_if(decorations.begin(), decorations.end(),
377 [&unreachableDefinitions](std::unique_ptr<Instruction>& I) -> bool {
378 Id decoration_id = I.get()->getIdOperand(0);
379 return unreachableDefinitions.count(decoration_id) != 0;
380 }),
381 decorations.end());
382 }
383
384 #ifndef GLSLANG_WEB
385 // comment in header
postProcessFeatures()386 void Builder::postProcessFeatures() {
387 // Add per-instruction capabilities, extensions, etc.,
388
389 // Look for any 8/16 bit type in physical storage buffer class, and set the
390 // appropriate capability. This happens in createSpvVariable for other storage
391 // classes, but there isn't always a variable for physical storage buffer.
392 for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {
393 Instruction* type = groupedTypes[OpTypePointer][t];
394 if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {
395 if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {
396 addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
397 addCapability(spv::CapabilityStorageBuffer8BitAccess);
398 }
399 if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||
400 containsType(type->getIdOperand(1), OpTypeFloat, 16)) {
401 addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
402 addCapability(spv::CapabilityStorageBuffer16BitAccess);
403 }
404 }
405 }
406
407 // process all block-contained instructions
408 for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
409 Function* f = *fi;
410 for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
411 Block* b = *bi;
412 for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)
413 postProcess(*ii->get());
414
415 // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether
416 // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the
417 // default.
418 for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) {
419 const Instruction& inst = *vi->get();
420 Id resultId = inst.getResultId();
421 if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) {
422 bool foundDecoration = false;
423 const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
424 if (decoration.get()->getIdOperand(0) == resultId &&
425 decoration.get()->getOpCode() == OpDecorate &&
426 (decoration.get()->getImmediateOperand(1) == spv::DecorationAliasedPointerEXT ||
427 decoration.get()->getImmediateOperand(1) == spv::DecorationRestrictPointerEXT)) {
428 foundDecoration = true;
429 }
430 };
431 std::for_each(decorations.begin(), decorations.end(), function);
432 if (!foundDecoration) {
433 addDecoration(resultId, spv::DecorationAliasedPointerEXT);
434 }
435 }
436 }
437 }
438 }
439 }
440 #endif
441
442 // comment in header
postProcess()443 void Builder::postProcess() {
444 postProcessCFG();
445 #ifndef GLSLANG_WEB
446 postProcessFeatures();
447 #endif
448 }
449
450 }; // end spv namespace
451