1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "VkPipeline.hpp"
16
17 #include "VkDestroy.hpp"
18 #include "VkDevice.hpp"
19 #include "VkPipelineCache.hpp"
20 #include "VkPipelineLayout.hpp"
21 #include "VkRenderPass.hpp"
22 #include "VkShaderModule.hpp"
23 #include "VkStringify.hpp"
24 #include "Pipeline/ComputeProgram.hpp"
25 #include "Pipeline/SpirvShader.hpp"
26
27 #include "marl/trace.h"
28
29 #include "spirv-tools/optimizer.hpp"
30
31 #include <iostream>
32
33 namespace {
34
getOrCreateSpirvProfiler()35 std::shared_ptr<sw::SpirvProfiler> getOrCreateSpirvProfiler()
36 {
37 const sw::Configuration &config = sw::getConfiguration();
38 static std::shared_ptr<sw::SpirvProfiler> profiler = sw::getConfiguration().enableSpirvProfiling ? std::make_shared<sw::SpirvProfiler>(config) : nullptr;
39 return profiler;
40 }
41
42 // optimizeSpirv() applies and freezes specializations into constants, and runs spirv-opt.
optimizeSpirv(const vk::PipelineCache::SpirvBinaryKey & key)43 sw::SpirvBinary optimizeSpirv(const vk::PipelineCache::SpirvBinaryKey &key)
44 {
45 const sw::SpirvBinary &code = key.getBinary();
46 const VkSpecializationInfo *specializationInfo = key.getSpecializationInfo();
47 bool optimize = key.getOptimization();
48
49 spvtools::Optimizer opt{ vk::SPIRV_VERSION };
50
51 opt.SetMessageConsumer([](spv_message_level_t level, const char *source, const spv_position_t &position, const char *message) {
52 switch(level)
53 {
54 case SPV_MSG_FATAL: sw::warn("SPIR-V FATAL: %d:%d %s\n", int(position.line), int(position.column), message);
55 case SPV_MSG_INTERNAL_ERROR: sw::warn("SPIR-V INTERNAL_ERROR: %d:%d %s\n", int(position.line), int(position.column), message);
56 case SPV_MSG_ERROR: sw::warn("SPIR-V ERROR: %d:%d %s\n", int(position.line), int(position.column), message);
57 case SPV_MSG_WARNING: sw::warn("SPIR-V WARNING: %d:%d %s\n", int(position.line), int(position.column), message);
58 case SPV_MSG_INFO: sw::trace("SPIR-V INFO: %d:%d %s\n", int(position.line), int(position.column), message);
59 case SPV_MSG_DEBUG: sw::trace("SPIR-V DEBUG: %d:%d %s\n", int(position.line), int(position.column), message);
60 default: sw::trace("SPIR-V MESSAGE: %d:%d %s\n", int(position.line), int(position.column), message);
61 }
62 });
63
64 // If the pipeline uses specialization, apply the specializations before freezing
65 if(specializationInfo)
66 {
67 std::unordered_map<uint32_t, std::vector<uint32_t>> specializations;
68 const uint8_t *specializationData = static_cast<const uint8_t *>(specializationInfo->pData);
69
70 for(uint32_t i = 0; i < specializationInfo->mapEntryCount; i++)
71 {
72 const VkSpecializationMapEntry &entry = specializationInfo->pMapEntries[i];
73 const uint8_t *value_ptr = specializationData + entry.offset;
74 std::vector<uint32_t> value(reinterpret_cast<const uint32_t *>(value_ptr),
75 reinterpret_cast<const uint32_t *>(value_ptr + entry.size));
76 specializations.emplace(entry.constantID, std::move(value));
77 }
78
79 opt.RegisterPass(spvtools::CreateSetSpecConstantDefaultValuePass(specializations));
80 }
81
82 if(optimize)
83 {
84 // Remove DontInline flags so the optimizer force-inlines all functions,
85 // as we currently don't support OpFunctionCall (b/141246700).
86 opt.RegisterPass(spvtools::CreateRemoveDontInlinePass());
87
88 // Full optimization list taken from spirv-opt.
89 opt.RegisterPerformancePasses();
90 }
91
92 spvtools::OptimizerOptions optimizerOptions = {};
93 #if defined(NDEBUG)
94 optimizerOptions.set_run_validator(false);
95 #else
96 optimizerOptions.set_run_validator(true);
97 spvtools::ValidatorOptions validatorOptions = {};
98 validatorOptions.SetScalarBlockLayout(true); // VK_EXT_scalar_block_layout
99 validatorOptions.SetUniformBufferStandardLayout(true); // VK_KHR_uniform_buffer_standard_layout
100 validatorOptions.SetAllowLocalSizeId(true); // VK_KHR_maintenance4
101 optimizerOptions.set_validator_options(validatorOptions);
102 #endif
103
104 sw::SpirvBinary optimized;
105 opt.Run(code.data(), code.size(), &optimized, optimizerOptions);
106 ASSERT(optimized.size() > 0);
107
108 if(false)
109 {
110 spvtools::SpirvTools core(vk::SPIRV_VERSION);
111 std::string preOpt;
112 core.Disassemble(code, &preOpt, SPV_BINARY_TO_TEXT_OPTION_NONE);
113 std::string postOpt;
114 core.Disassemble(optimized, &postOpt, SPV_BINARY_TO_TEXT_OPTION_NONE);
115 std::cout << "PRE-OPT: " << preOpt << std::endl
116 << "POST-OPT: " << postOpt << std::endl;
117 }
118
119 return optimized;
120 }
121
createProgram(vk::Device * device,std::shared_ptr<sw::SpirvShader> shader,const vk::PipelineLayout * layout)122 std::shared_ptr<sw::ComputeProgram> createProgram(vk::Device *device, std::shared_ptr<sw::SpirvShader> shader, const vk::PipelineLayout *layout)
123 {
124 MARL_SCOPED_EVENT("createProgram");
125
126 vk::DescriptorSet::Bindings descriptorSets; // TODO(b/129523279): Delay code generation until dispatch time.
127 // TODO(b/119409619): use allocator.
128 auto program = std::make_shared<sw::ComputeProgram>(device, shader, layout, descriptorSets);
129 program->generate();
130 program->finalize("ComputeProgram");
131
132 return program;
133 }
134
135 class PipelineCreationFeedback
136 {
137 public:
PipelineCreationFeedback(const VkGraphicsPipelineCreateInfo * pCreateInfo)138 PipelineCreationFeedback(const VkGraphicsPipelineCreateInfo *pCreateInfo)
139 : pipelineCreationFeedback(GetPipelineCreationFeedback(pCreateInfo->pNext))
140 {
141 pipelineCreationBegins();
142 }
143
PipelineCreationFeedback(const VkComputePipelineCreateInfo * pCreateInfo)144 PipelineCreationFeedback(const VkComputePipelineCreateInfo *pCreateInfo)
145 : pipelineCreationFeedback(GetPipelineCreationFeedback(pCreateInfo->pNext))
146 {
147 pipelineCreationBegins();
148 }
149
~PipelineCreationFeedback()150 ~PipelineCreationFeedback()
151 {
152 pipelineCreationEnds();
153 }
154
stageCreationBegins(uint32_t stage)155 void stageCreationBegins(uint32_t stage)
156 {
157 if(pipelineCreationFeedback)
158 {
159 // Record stage creation begin time
160 pipelineCreationFeedback->pPipelineStageCreationFeedbacks[stage].duration = now();
161 }
162 }
163
cacheHit(uint32_t stage)164 void cacheHit(uint32_t stage)
165 {
166 if(pipelineCreationFeedback)
167 {
168 pipelineCreationFeedback->pPipelineCreationFeedback->flags |=
169 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
170 pipelineCreationFeedback->pPipelineStageCreationFeedbacks[stage].flags |=
171 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
172 }
173 }
174
stageCreationEnds(uint32_t stage)175 void stageCreationEnds(uint32_t stage)
176 {
177 if(pipelineCreationFeedback)
178 {
179 pipelineCreationFeedback->pPipelineStageCreationFeedbacks[stage].flags |=
180 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
181 pipelineCreationFeedback->pPipelineStageCreationFeedbacks[stage].duration =
182 now() - pipelineCreationFeedback->pPipelineStageCreationFeedbacks[stage].duration;
183 }
184 }
185
pipelineCreationError()186 void pipelineCreationError()
187 {
188 clear();
189 pipelineCreationFeedback = nullptr;
190 }
191
192 private:
GetPipelineCreationFeedback(const void * pNext)193 static const VkPipelineCreationFeedbackCreateInfo *GetPipelineCreationFeedback(const void *pNext)
194 {
195 const VkBaseInStructure *extensionCreateInfo = reinterpret_cast<const VkBaseInStructure *>(pNext);
196 while(extensionCreateInfo)
197 {
198 if(extensionCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CREATION_FEEDBACK_CREATE_INFO)
199 {
200 return reinterpret_cast<const VkPipelineCreationFeedbackCreateInfo *>(extensionCreateInfo);
201 }
202
203 extensionCreateInfo = extensionCreateInfo->pNext;
204 }
205
206 return nullptr;
207 }
208
pipelineCreationBegins()209 void pipelineCreationBegins()
210 {
211 if(pipelineCreationFeedback)
212 {
213 clear();
214
215 // Record pipeline creation begin time
216 pipelineCreationFeedback->pPipelineCreationFeedback->duration = now();
217 }
218 }
219
pipelineCreationEnds()220 void pipelineCreationEnds()
221 {
222 if(pipelineCreationFeedback)
223 {
224 pipelineCreationFeedback->pPipelineCreationFeedback->flags |=
225 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
226 pipelineCreationFeedback->pPipelineCreationFeedback->duration =
227 now() - pipelineCreationFeedback->pPipelineCreationFeedback->duration;
228 }
229 }
230
clear()231 void clear()
232 {
233 if(pipelineCreationFeedback)
234 {
235 // Clear all flags and durations
236 pipelineCreationFeedback->pPipelineCreationFeedback->flags = 0;
237 pipelineCreationFeedback->pPipelineCreationFeedback->duration = 0;
238 for(uint32_t i = 0; i < pipelineCreationFeedback->pipelineStageCreationFeedbackCount; i++)
239 {
240 pipelineCreationFeedback->pPipelineStageCreationFeedbacks[i].flags = 0;
241 pipelineCreationFeedback->pPipelineStageCreationFeedbacks[i].duration = 0;
242 }
243 }
244 }
245
now()246 uint64_t now()
247 {
248 return std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
249 }
250
251 const VkPipelineCreationFeedbackCreateInfo *pipelineCreationFeedback = nullptr;
252 };
253
254 } // anonymous namespace
255
256 namespace vk {
257
Pipeline(PipelineLayout * layout,Device * device)258 Pipeline::Pipeline(PipelineLayout *layout, Device *device)
259 : layout(layout)
260 , device(device)
261 , robustBufferAccess(device->getEnabledFeatures().robustBufferAccess)
262 {
263 layout->incRefCount();
264 }
265
destroy(const VkAllocationCallbacks * pAllocator)266 void Pipeline::destroy(const VkAllocationCallbacks *pAllocator)
267 {
268 destroyPipeline(pAllocator);
269
270 vk::release(static_cast<VkPipelineLayout>(*layout), pAllocator);
271 }
272
GraphicsPipeline(const VkGraphicsPipelineCreateInfo * pCreateInfo,void * mem,Device * device)273 GraphicsPipeline::GraphicsPipeline(const VkGraphicsPipelineCreateInfo *pCreateInfo, void *mem, Device *device)
274 : Pipeline(vk::Cast(pCreateInfo->layout), device)
275 , state(device, pCreateInfo, layout, robustBufferAccess)
276 , inputs(pCreateInfo->pVertexInputState)
277 {
278 }
279
destroyPipeline(const VkAllocationCallbacks * pAllocator)280 void GraphicsPipeline::destroyPipeline(const VkAllocationCallbacks *pAllocator)
281 {
282 vertexShader.reset();
283 fragmentShader.reset();
284 }
285
ComputeRequiredAllocationSize(const VkGraphicsPipelineCreateInfo * pCreateInfo)286 size_t GraphicsPipeline::ComputeRequiredAllocationSize(const VkGraphicsPipelineCreateInfo *pCreateInfo)
287 {
288 return 0;
289 }
290
getIndexBuffers(const vk::DynamicState & dynamicState,uint32_t count,uint32_t first,bool indexed,std::vector<std::pair<uint32_t,void * >> * indexBuffers) const291 void GraphicsPipeline::getIndexBuffers(const vk::DynamicState &dynamicState, uint32_t count, uint32_t first, bool indexed, std::vector<std::pair<uint32_t, void *>> *indexBuffers) const
292 {
293 VkPrimitiveTopology topology = state.hasDynamicTopology() ? dynamicState.primitiveTopology : state.getTopology();
294 indexBuffer.getIndexBuffers(topology, count, first, indexed, state.hasPrimitiveRestartEnable(), indexBuffers);
295 }
296
containsImageWrite() const297 bool GraphicsPipeline::containsImageWrite() const
298 {
299 return (vertexShader.get() && vertexShader->containsImageWrite()) ||
300 (fragmentShader.get() && fragmentShader->containsImageWrite());
301 }
302
setShader(const VkShaderStageFlagBits & stage,const std::shared_ptr<sw::SpirvShader> spirvShader)303 void GraphicsPipeline::setShader(const VkShaderStageFlagBits &stage, const std::shared_ptr<sw::SpirvShader> spirvShader)
304 {
305 switch(stage)
306 {
307 case VK_SHADER_STAGE_VERTEX_BIT:
308 ASSERT(vertexShader.get() == nullptr);
309 vertexShader = spirvShader;
310 break;
311
312 case VK_SHADER_STAGE_FRAGMENT_BIT:
313 ASSERT(fragmentShader.get() == nullptr);
314 fragmentShader = spirvShader;
315 break;
316
317 default:
318 UNSUPPORTED("Unsupported stage");
319 break;
320 }
321 }
322
getShader(const VkShaderStageFlagBits & stage) const323 const std::shared_ptr<sw::SpirvShader> GraphicsPipeline::getShader(const VkShaderStageFlagBits &stage) const
324 {
325 switch(stage)
326 {
327 case VK_SHADER_STAGE_VERTEX_BIT:
328 return vertexShader;
329 case VK_SHADER_STAGE_FRAGMENT_BIT:
330 return fragmentShader;
331 default:
332 UNSUPPORTED("Unsupported stage");
333 return fragmentShader;
334 }
335 }
336
compileShaders(const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo,PipelineCache * pPipelineCache)337 VkResult GraphicsPipeline::compileShaders(const VkAllocationCallbacks *pAllocator, const VkGraphicsPipelineCreateInfo *pCreateInfo, PipelineCache *pPipelineCache)
338 {
339 PipelineCreationFeedback pipelineCreationFeedback(pCreateInfo);
340
341 for(uint32_t stageIndex = 0; stageIndex < pCreateInfo->stageCount; stageIndex++)
342 {
343 const VkPipelineShaderStageCreateInfo &stageInfo = pCreateInfo->pStages[stageIndex];
344
345 pipelineCreationFeedback.stageCreationBegins(stageIndex);
346
347 if((stageInfo.flags &
348 ~(VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT |
349 VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT)) != 0)
350 {
351 UNSUPPORTED("pStage->flags %d", int(stageInfo.flags));
352 }
353
354 auto dbgctx = device->getDebuggerContext();
355 // Do not optimize the shader if we have a debugger context.
356 // Optimization passes are likely to damage debug information, and reorder
357 // instructions.
358 const bool optimize = !dbgctx;
359
360 const ShaderModule *module = vk::Cast(stageInfo.module);
361 const PipelineCache::SpirvBinaryKey key(module->getBinary(), stageInfo.pSpecializationInfo, optimize);
362
363 if((pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) &&
364 (!pPipelineCache || !pPipelineCache->contains(key)))
365 {
366 pipelineCreationFeedback.pipelineCreationError();
367 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
368 }
369
370 sw::SpirvBinary spirv;
371
372 if(pPipelineCache)
373 {
374 auto onCacheMiss = [&] { return optimizeSpirv(key); };
375 auto onCacheHit = [&] { pipelineCreationFeedback.cacheHit(stageIndex); };
376 spirv = pPipelineCache->getOrOptimizeSpirv(key, onCacheMiss, onCacheHit);
377 }
378 else
379 {
380 spirv = optimizeSpirv(key);
381
382 // If the pipeline does not have specialization constants, there's a 1-to-1 mapping between the unoptimized and optimized SPIR-V,
383 // so we should use a 1-to-1 mapping of the identifiers to avoid JIT routine recompiles.
384 if(!key.getSpecializationInfo())
385 {
386 spirv.mapOptimizedIdentifier(key.getBinary());
387 }
388 }
389
390 // TODO(b/201798871): use allocator.
391 auto shader = std::make_shared<sw::SpirvShader>(stageInfo.stage, stageInfo.pName, spirv,
392 vk::Cast(pCreateInfo->renderPass), pCreateInfo->subpass, robustBufferAccess, dbgctx, getOrCreateSpirvProfiler());
393
394 setShader(stageInfo.stage, shader);
395
396 pipelineCreationFeedback.stageCreationEnds(stageIndex);
397 }
398
399 return VK_SUCCESS;
400 }
401
ComputePipeline(const VkComputePipelineCreateInfo * pCreateInfo,void * mem,Device * device)402 ComputePipeline::ComputePipeline(const VkComputePipelineCreateInfo *pCreateInfo, void *mem, Device *device)
403 : Pipeline(vk::Cast(pCreateInfo->layout), device)
404 {
405 }
406
destroyPipeline(const VkAllocationCallbacks * pAllocator)407 void ComputePipeline::destroyPipeline(const VkAllocationCallbacks *pAllocator)
408 {
409 shader.reset();
410 program.reset();
411 }
412
ComputeRequiredAllocationSize(const VkComputePipelineCreateInfo * pCreateInfo)413 size_t ComputePipeline::ComputeRequiredAllocationSize(const VkComputePipelineCreateInfo *pCreateInfo)
414 {
415 return 0;
416 }
417
compileShaders(const VkAllocationCallbacks * pAllocator,const VkComputePipelineCreateInfo * pCreateInfo,PipelineCache * pPipelineCache)418 VkResult ComputePipeline::compileShaders(const VkAllocationCallbacks *pAllocator, const VkComputePipelineCreateInfo *pCreateInfo, PipelineCache *pPipelineCache)
419 {
420 PipelineCreationFeedback pipelineCreationFeedback(pCreateInfo);
421 pipelineCreationFeedback.stageCreationBegins(0);
422
423 auto &stage = pCreateInfo->stage;
424 const ShaderModule *module = vk::Cast(stage.module);
425
426 ASSERT(shader.get() == nullptr);
427 ASSERT(program.get() == nullptr);
428
429 auto dbgctx = device->getDebuggerContext();
430 // Do not optimize the shader if we have a debugger context.
431 // Optimization passes are likely to damage debug information, and reorder
432 // instructions.
433 const bool optimize = !dbgctx;
434
435 const PipelineCache::SpirvBinaryKey shaderKey(module->getBinary(), stage.pSpecializationInfo, optimize);
436
437 if((pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) &&
438 (!pPipelineCache || !pPipelineCache->contains(shaderKey)))
439 {
440 pipelineCreationFeedback.pipelineCreationError();
441 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
442 }
443
444 sw::SpirvBinary spirv;
445
446 if(pPipelineCache)
447 {
448 auto onCacheMiss = [&] { return optimizeSpirv(shaderKey); };
449 auto onCacheHit = [&] { pipelineCreationFeedback.cacheHit(0); };
450 spirv = pPipelineCache->getOrOptimizeSpirv(shaderKey, onCacheMiss, onCacheHit);
451 }
452 else
453 {
454 spirv = optimizeSpirv(shaderKey);
455
456 // If the pipeline does not have specialization constants, there's a 1-to-1 mapping between the unoptimized and optimized SPIR-V,
457 // so we should use a 1-to-1 mapping of the identifiers to avoid JIT routine recompiles.
458 if(!shaderKey.getSpecializationInfo())
459 {
460 spirv.mapOptimizedIdentifier(shaderKey.getBinary());
461 }
462 }
463
464 // TODO(b/201798871): use allocator.
465 shader = std::make_shared<sw::SpirvShader>(stage.stage, stage.pName, spirv,
466 nullptr, 0, robustBufferAccess, dbgctx, getOrCreateSpirvProfiler());
467
468 const PipelineCache::ComputeProgramKey programKey(shader->getIdentifier(), layout->identifier);
469
470 if(pPipelineCache)
471 {
472 program = pPipelineCache->getOrCreateComputeProgram(programKey, [&] {
473 return createProgram(device, shader, layout);
474 });
475 }
476 else
477 {
478 program = createProgram(device, shader, layout);
479 }
480
481 pipelineCreationFeedback.stageCreationEnds(0);
482
483 return VK_SUCCESS;
484 }
485
run(uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ,vk::DescriptorSet::Array const & descriptorSetObjects,vk::DescriptorSet::Bindings const & descriptorSets,vk::DescriptorSet::DynamicOffsets const & descriptorDynamicOffsets,vk::Pipeline::PushConstantStorage const & pushConstants)486 void ComputePipeline::run(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
487 uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
488 vk::DescriptorSet::Array const &descriptorSetObjects,
489 vk::DescriptorSet::Bindings const &descriptorSets,
490 vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
491 vk::Pipeline::PushConstantStorage const &pushConstants)
492 {
493 ASSERT_OR_RETURN(program != nullptr);
494 program->run(
495 descriptorSetObjects, descriptorSets, descriptorDynamicOffsets, pushConstants,
496 baseGroupX, baseGroupY, baseGroupZ,
497 groupCountX, groupCountY, groupCountZ);
498 }
499
500 } // namespace vk
501