1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SpirvShader.hpp"
16 #include "SpirvShaderDebug.hpp"
17
18 #include "ShaderCore.hpp"
19
20 #include "Vulkan/VkPipelineLayout.hpp"
21
22 #include <spirv/unified1/spirv.hpp>
23
24 namespace sw {
25
EmitLoad(InsnIterator insn,EmitState * state) const26 SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
27 {
28 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
29 Object::ID resultId = insn.word(2);
30 Object::ID pointerId = insn.word(3);
31 auto &result = getObject(resultId);
32 auto &resultTy = getType(result);
33 auto &pointer = getObject(pointerId);
34 auto &pointerTy = getType(pointer);
35 std::memory_order memoryOrder = std::memory_order_relaxed;
36
37 ASSERT(getType(pointer).element == result.typeId());
38 ASSERT(Type::ID(insn.word(1)) == result.typeId());
39 ASSERT(!atomic || getType(getType(pointer).element).opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
40
41 if(pointerTy.storageClass == spv::StorageClassUniformConstant)
42 {
43 // Just propagate the pointer.
44 auto &ptr = state->getPointer(pointerId);
45 state->createPointer(resultId, ptr);
46 return EmitResult::Continue;
47 }
48
49 if(atomic)
50 {
51 Object::ID semanticsId = insn.word(5);
52 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
53 memoryOrder = MemoryOrder(memorySemantics);
54 }
55
56 auto ptr = GetPointerToData(pointerId, 0, state);
57 bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
58 auto &dst = state->createIntermediate(resultId, resultTy.componentCount);
59 auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
60
61 VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
62 auto p = ptr + el.offset;
63 if(interleavedByLane) { p = InterleaveByLane(p); } // TODO: Interleave once, then add offset?
64 dst.move(el.index, p.Load<SIMD::Float>(robustness, state->activeLaneMask(), atomic, memoryOrder));
65 });
66
67 SPIRV_SHADER_DBG("Load(atomic: {0}, order: {1}, ptr: {2}, val: {3}, mask: {4})", atomic, int(memoryOrder), ptr, dst, state->activeLaneMask());
68
69 return EmitResult::Continue;
70 }
71
EmitStore(InsnIterator insn,EmitState * state) const72 SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
73 {
74 bool atomic = (insn.opcode() == spv::OpAtomicStore);
75 Object::ID pointerId = insn.word(1);
76 Object::ID objectId = insn.word(atomic ? 4 : 2);
77 std::memory_order memoryOrder = std::memory_order_relaxed;
78
79 if(atomic)
80 {
81 Object::ID semanticsId = insn.word(3);
82 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
83 memoryOrder = MemoryOrder(memorySemantics);
84 }
85
86 const auto &value = Operand(this, state, objectId);
87
88 Store(pointerId, value, atomic, memoryOrder, state);
89
90 return EmitResult::Continue;
91 }
92
Store(Object::ID pointerId,const Operand & value,bool atomic,std::memory_order memoryOrder,EmitState * state) const93 void SpirvShader::Store(Object::ID pointerId, const Operand &value, bool atomic, std::memory_order memoryOrder, EmitState *state) const
94 {
95 auto &pointer = getObject(pointerId);
96 auto &pointerTy = getType(pointer);
97 auto &elementTy = getType(pointerTy.element);
98
99 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
100
101 auto ptr = GetPointerToData(pointerId, 0, state);
102 bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
103 auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
104
105 SIMD::Int mask = state->activeLaneMask();
106 if(!StoresInHelperInvocation(pointerTy.storageClass))
107 {
108 mask = mask & state->storesAndAtomicsMask();
109 }
110
111 SPIRV_SHADER_DBG("Store(atomic: {0}, order: {1}, ptr: {2}, val: {3}, mask: {4}", atomic, int(memoryOrder), ptr, value, mask);
112
113 VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
114 auto p = ptr + el.offset;
115 if(interleavedByLane) { p = InterleaveByLane(p); }
116 p.Store(value.Float(el.index), robustness, mask, atomic, memoryOrder);
117 });
118 }
119
EmitVariable(InsnIterator insn,EmitState * state) const120 SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
121 {
122 auto routine = state->routine;
123 Object::ID resultId = insn.word(2);
124 auto &object = getObject(resultId);
125 auto &objectTy = getType(object);
126
127 switch(objectTy.storageClass)
128 {
129 case spv::StorageClassOutput:
130 case spv::StorageClassPrivate:
131 case spv::StorageClassFunction:
132 {
133 ASSERT(objectTy.opcode() == spv::OpTypePointer);
134 auto base = &routine->getVariable(resultId)[0];
135 auto elementTy = getType(objectTy.element);
136 auto size = elementTy.componentCount * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
137 state->createPointer(resultId, SIMD::Pointer(base, size));
138 break;
139 }
140 case spv::StorageClassWorkgroup:
141 {
142 ASSERT(objectTy.opcode() == spv::OpTypePointer);
143 auto base = &routine->workgroupMemory[0];
144 auto size = workgroupMemory.size();
145 state->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
146 break;
147 }
148 case spv::StorageClassInput:
149 {
150 if(object.kind == Object::Kind::InterfaceVariable)
151 {
152 auto &dst = routine->getVariable(resultId);
153 int offset = 0;
154 VisitInterface(resultId,
155 [&](Decorations const &d, AttribType type) {
156 auto scalarSlot = d.Location << 2 | d.Component;
157 dst[offset++] = routine->inputs[scalarSlot];
158 });
159 }
160 ASSERT(objectTy.opcode() == spv::OpTypePointer);
161 auto base = &routine->getVariable(resultId)[0];
162 auto elementTy = getType(objectTy.element);
163 auto size = elementTy.componentCount * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
164 state->createPointer(resultId, SIMD::Pointer(base, size));
165 break;
166 }
167 case spv::StorageClassUniformConstant:
168 {
169 const auto &d = descriptorDecorations.at(resultId);
170 ASSERT(d.DescriptorSet >= 0);
171 ASSERT(d.Binding >= 0);
172
173 uint32_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
174 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
175 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor*
176 auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
177 state->createPointer(resultId, SIMD::Pointer(binding, size));
178 break;
179 }
180 case spv::StorageClassUniform:
181 case spv::StorageClassStorageBuffer:
182 {
183 const auto &d = descriptorDecorations.at(resultId);
184 ASSERT(d.DescriptorSet >= 0);
185 auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
186 // Note: the module may contain descriptor set references that are not suitable for this implementation -- using a set index higher than the number
187 // of descriptor set binding points we support. As long as the selected entrypoint doesn't actually touch the out of range binding points, this
188 // is valid. In this case make the value nullptr to make it easier to diagnose an attempt to dereference it.
189 if(d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS)
190 {
191 state->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
192 }
193 else
194 {
195 state->createPointer(resultId, SIMD::Pointer(nullptr, 0));
196 }
197 break;
198 }
199 case spv::StorageClassPushConstant:
200 {
201 state->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
202 break;
203 }
204 default:
205 UNREACHABLE("Storage class %d", objectTy.storageClass);
206 break;
207 }
208
209 if(insn.wordCount() > 4)
210 {
211 Object::ID initializerId = insn.word(4);
212 if(getObject(initializerId).kind != Object::Kind::Constant)
213 {
214 UNIMPLEMENTED("b/148241854: Non-constant initializers not yet implemented"); // FIXME(b/148241854)
215 }
216
217 switch(objectTy.storageClass)
218 {
219 case spv::StorageClassOutput:
220 case spv::StorageClassPrivate:
221 case spv::StorageClassFunction:
222 {
223 bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
224 auto ptr = GetPointerToData(resultId, 0, state);
225 Operand initialValue(this, state, initializerId);
226 VisitMemoryObject(resultId, [&](const MemoryElement &el) {
227 auto p = ptr + el.offset;
228 if(interleavedByLane) { p = InterleaveByLane(p); }
229 auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds.
230 p.Store(initialValue.Float(el.index), robustness, state->activeLaneMask());
231 });
232 break;
233 }
234 default:
235 ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass));
236 }
237 }
238
239 return EmitResult::Continue;
240 }
241
EmitCopyMemory(InsnIterator insn,EmitState * state) const242 SpirvShader::EmitResult SpirvShader::EmitCopyMemory(InsnIterator insn, EmitState *state) const
243 {
244 Object::ID dstPtrId = insn.word(1);
245 Object::ID srcPtrId = insn.word(2);
246 auto &dstPtrTy = getType(getObject(dstPtrId));
247 auto &srcPtrTy = getType(getObject(srcPtrId));
248 ASSERT(dstPtrTy.element == srcPtrTy.element);
249
250 bool dstInterleavedByLane = IsStorageInterleavedByLane(dstPtrTy.storageClass);
251 bool srcInterleavedByLane = IsStorageInterleavedByLane(srcPtrTy.storageClass);
252 auto dstPtr = GetPointerToData(dstPtrId, 0, state);
253 auto srcPtr = GetPointerToData(srcPtrId, 0, state);
254
255 std::unordered_map<uint32_t, uint32_t> srcOffsets;
256
257 VisitMemoryObject(srcPtrId, [&](const MemoryElement &el) { srcOffsets[el.index] = el.offset; });
258
259 VisitMemoryObject(dstPtrId, [&](const MemoryElement &el) {
260 auto it = srcOffsets.find(el.index);
261 ASSERT(it != srcOffsets.end());
262 auto srcOffset = it->second;
263 auto dstOffset = el.offset;
264
265 auto dst = dstPtr + dstOffset;
266 auto src = srcPtr + srcOffset;
267 if(dstInterleavedByLane) { dst = InterleaveByLane(dst); }
268 if(srcInterleavedByLane) { src = InterleaveByLane(src); }
269
270 // TODO(b/131224163): Optimize based on src/dst storage classes.
271 auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
272
273 auto value = src.Load<SIMD::Float>(robustness, state->activeLaneMask());
274 dst.Store(value, robustness, state->activeLaneMask());
275 });
276 return EmitResult::Continue;
277 }
278
EmitMemoryBarrier(InsnIterator insn,EmitState * state) const279 SpirvShader::EmitResult SpirvShader::EmitMemoryBarrier(InsnIterator insn, EmitState *state) const
280 {
281 auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(2)));
282 // TODO: We probably want to consider the memory scope here. For now,
283 // just always emit the full fence.
284 Fence(semantics);
285 return EmitResult::Continue;
286 }
287
VisitMemoryObjectInner(sw::SpirvShader::Type::ID id,sw::SpirvShader::Decorations d,uint32_t & index,uint32_t offset,const MemoryVisitor & f) const288 void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &f) const
289 {
290 ApplyDecorationsForId(&d, id);
291 auto const &type = getType(id);
292
293 if(d.HasOffset)
294 {
295 offset += d.Offset;
296 d.HasOffset = false;
297 }
298
299 switch(type.opcode())
300 {
301 case spv::OpTypePointer:
302 VisitMemoryObjectInner(type.definition.word(3), d, index, offset, f);
303 break;
304 case spv::OpTypeInt:
305 case spv::OpTypeFloat:
306 case spv::OpTypeRuntimeArray:
307 f(MemoryElement{ index++, offset, type });
308 break;
309 case spv::OpTypeVector:
310 {
311 auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
312 for(auto i = 0u; i < type.definition.word(3); i++)
313 {
314 VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
315 }
316 break;
317 }
318 case spv::OpTypeMatrix:
319 {
320 auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
321 d.InsideMatrix = true;
322 for(auto i = 0u; i < type.definition.word(3); i++)
323 {
324 ASSERT(d.HasMatrixStride);
325 VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f);
326 }
327 break;
328 }
329 case spv::OpTypeStruct:
330 for(auto i = 0u; i < type.definition.wordCount() - 2; i++)
331 {
332 ApplyDecorationsForIdMember(&d, id, i);
333 VisitMemoryObjectInner(type.definition.word(i + 2), d, index, offset, f);
334 }
335 break;
336 case spv::OpTypeArray:
337 {
338 auto arraySize = GetConstScalarInt(type.definition.word(3));
339 for(auto i = 0u; i < arraySize; i++)
340 {
341 ASSERT(d.HasArrayStride);
342 VisitMemoryObjectInner(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
343 }
344 break;
345 }
346 default:
347 UNREACHABLE("%s", OpcodeName(type.opcode()));
348 }
349 }
350
VisitMemoryObject(Object::ID id,const MemoryVisitor & f) const351 void SpirvShader::VisitMemoryObject(Object::ID id, const MemoryVisitor &f) const
352 {
353 auto typeId = getObject(id).typeId();
354 auto const &type = getType(typeId);
355
356 if(IsExplicitLayout(type.storageClass))
357 {
358 Decorations d{};
359 ApplyDecorationsForId(&d, id);
360 uint32_t index = 0;
361 VisitMemoryObjectInner(typeId, d, index, 0, f);
362 }
363 else
364 {
365 // Objects without explicit layout are tightly packed.
366 auto &elType = getType(type.element);
367 for(auto index = 0u; index < elType.componentCount; index++)
368 {
369 auto offset = static_cast<uint32_t>(index * sizeof(float));
370 f({ index, offset, elType });
371 }
372 }
373 }
374
GetPointerToData(Object::ID id,Int arrayIndex,EmitState const * state) const375 SIMD::Pointer SpirvShader::GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const
376 {
377 auto routine = state->routine;
378 auto &object = getObject(id);
379 switch(object.kind)
380 {
381 case Object::Kind::Pointer:
382 case Object::Kind::InterfaceVariable:
383 return state->getPointer(id);
384
385 case Object::Kind::DescriptorSet:
386 {
387 const auto &d = descriptorDecorations.at(id);
388 ASSERT(d.DescriptorSet >= 0 && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS);
389 ASSERT(d.Binding >= 0);
390 ASSERT(routine->pipelineLayout->getDescriptorCount(d.DescriptorSet, d.Binding) != 0); // "If descriptorCount is zero this binding entry is reserved and the resource must not be accessed from any stage via this binding within any pipeline using the set layout."
391
392 uint32_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
393 uint32_t descriptorSize = routine->pipelineLayout->getDescriptorSize(d.DescriptorSet, d.Binding);
394 Int descriptorOffset = bindingOffset + descriptorSize * arrayIndex;
395
396 auto set = state->getPointer(id);
397 Pointer<Byte> descriptor = set.base + descriptorOffset; // BufferDescriptor*
398 Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void*
399 Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
400
401 if(routine->pipelineLayout->isDescriptorDynamic(d.DescriptorSet, d.Binding))
402 {
403 Int dynamicOffsetIndex =
404 routine->pipelineLayout->getDynamicOffsetIndex(d.DescriptorSet, d.Binding) +
405 arrayIndex;
406 Int offset = routine->descriptorDynamicOffsets[dynamicOffsetIndex];
407 Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
408
409 return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
410 }
411 else
412 {
413 return SIMD::Pointer(data, size);
414 }
415 }
416
417 default:
418 UNREACHABLE("Invalid pointer kind %d", int(object.kind));
419 return SIMD::Pointer(Pointer<Byte>(), 0);
420 }
421 }
422
MemoryOrder(spv::MemorySemanticsMask memorySemantics)423 std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
424 {
425 auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
426 spv::MemorySemanticsAcquireMask |
427 spv::MemorySemanticsReleaseMask |
428 spv::MemorySemanticsAcquireReleaseMask |
429 spv::MemorySemanticsSequentiallyConsistentMask);
430 switch(control)
431 {
432 case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
433 case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
434 case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
435 case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
436 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
437 default:
438 // "it is invalid for more than one of these four bits to be set:
439 // Acquire, Release, AcquireRelease, or SequentiallyConsistent."
440 UNREACHABLE("MemorySemanticsMask: %x", int(control));
441 return std::memory_order_acq_rel;
442 }
443 }
444
StoresInHelperInvocation(spv::StorageClass storageClass)445 bool SpirvShader::StoresInHelperInvocation(spv::StorageClass storageClass)
446 {
447 switch(storageClass)
448 {
449 case spv::StorageClassUniform:
450 case spv::StorageClassStorageBuffer:
451 case spv::StorageClassImage:
452 return false;
453 default:
454 return true;
455 }
456 }
457
IsExplicitLayout(spv::StorageClass storageClass)458 bool SpirvShader::IsExplicitLayout(spv::StorageClass storageClass)
459 {
460 switch(storageClass)
461 {
462 case spv::StorageClassUniform:
463 case spv::StorageClassStorageBuffer:
464 case spv::StorageClassPushConstant:
465 return true;
466 default:
467 return false;
468 }
469 }
470
InterleaveByLane(sw::SIMD::Pointer p)471 sw::SIMD::Pointer SpirvShader::InterleaveByLane(sw::SIMD::Pointer p)
472 {
473 p *= sw::SIMD::Width;
474 p.staticOffsets[0] += 0 * sizeof(float);
475 p.staticOffsets[1] += 1 * sizeof(float);
476 p.staticOffsets[2] += 2 * sizeof(float);
477 p.staticOffsets[3] += 3 * sizeof(float);
478 return p;
479 }
480
IsStorageInterleavedByLane(spv::StorageClass storageClass)481 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
482 {
483 switch(storageClass)
484 {
485 case spv::StorageClassUniform:
486 case spv::StorageClassStorageBuffer:
487 case spv::StorageClassPushConstant:
488 case spv::StorageClassWorkgroup:
489 case spv::StorageClassImage:
490 return false;
491 default:
492 return true;
493 }
494 }
495
496 } // namespace sw