• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLKernelVk.cpp: Implements the class methods for CLKernelVk.
7 
8 #include "common/PackedEnums.h"
9 
10 #include "libANGLE/renderer/vulkan/CLContextVk.h"
11 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
12 #include "libANGLE/renderer/vulkan/CLKernelVk.h"
13 #include "libANGLE/renderer/vulkan/CLMemoryVk.h"
14 #include "libANGLE/renderer/vulkan/CLProgramVk.h"
15 #include "libANGLE/renderer/vulkan/vk_wrapper.h"
16 
17 #include "libANGLE/CLBuffer.h"
18 #include "libANGLE/CLContext.h"
19 #include "libANGLE/CLKernel.h"
20 #include "libANGLE/CLProgram.h"
21 #include "libANGLE/cl_utils.h"
22 #include "spirv/unified1/NonSemanticClspvReflection.h"
23 
24 namespace rx
25 {
26 
CLKernelVk(const cl::Kernel & kernel,std::string & name,std::string & attributes,CLKernelArguments & args)27 CLKernelVk::CLKernelVk(const cl::Kernel &kernel,
28                        std::string &name,
29                        std::string &attributes,
30                        CLKernelArguments &args)
31     : CLKernelImpl(kernel),
32       mProgram(&kernel.getProgram().getImpl<CLProgramVk>()),
33       mContext(&kernel.getProgram().getContext().getImpl<CLContextVk>()),
34       mName(name),
35       mAttributes(attributes),
36       mArgs(args),
37       mPodBuffer(nullptr)
38 {
39     mShaderProgramHelper.setShader(gl::ShaderType::Compute,
40                                    mKernel.getProgram().getImpl<CLProgramVk>().getShaderModule());
41 }
42 
~CLKernelVk()43 CLKernelVk::~CLKernelVk()
44 {
45     mComputePipelineCache.destroy(mContext);
46     mShaderProgramHelper.destroy(mContext->getRenderer());
47 
48     if (mPodBuffer)
49     {
50         // mPodBuffer assignment will make newly created buffer
51         // return refcount of 2, so need to release by 1
52         mPodBuffer->release();
53     }
54 }
55 
init()56 angle::Result CLKernelVk::init()
57 {
58     const CLProgramVk::DeviceProgramData *deviceProgramData =
59         mProgram->getDeviceProgramData(mName.c_str());
60 
61     // Literal sampler handling
62     for (const ClspvLiteralSampler &literalSampler :
63          deviceProgramData->reflectionData.literalSamplers)
64     {
65         mDescriptorSetLayoutDescs[DescriptorSetIndex::LiteralSampler].addBinding(
66             literalSampler.binding, VK_DESCRIPTOR_TYPE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT,
67             nullptr);
68     }
69 
70     vk::DescriptorSetLayoutDesc &descriptorSetLayoutDesc =
71         mDescriptorSetLayoutDescs[DescriptorSetIndex::KernelArguments];
72     VkPushConstantRange pcRange = deviceProgramData->pushConstRange;
73     size_t podBufferSize        = 0;
74 
75     bool podFound = false;
76     for (const auto &arg : getArgs())
77     {
78         VkDescriptorType descType = VK_DESCRIPTOR_TYPE_MAX_ENUM;
79         switch (arg.type)
80         {
81             case NonSemanticClspvReflectionArgumentStorageBuffer:
82                 descType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
83                 break;
84             case NonSemanticClspvReflectionArgumentUniform:
85             case NonSemanticClspvReflectionArgumentPointerUniform:
86                 descType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
87                 break;
88             case NonSemanticClspvReflectionArgumentPodUniform:
89             case NonSemanticClspvReflectionArgumentPodStorageBuffer:
90             {
91                 uint32_t newPodBufferSize = arg.podStorageBufferOffset + arg.podStorageBufferSize;
92                 podBufferSize = newPodBufferSize > podBufferSize ? newPodBufferSize : podBufferSize;
93                 if (podFound)
94                 {
95                     continue;
96                 }
97                 descType = arg.type == NonSemanticClspvReflectionArgumentPodUniform
98                                ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
99                                : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
100                 podFound = true;
101                 break;
102             }
103             case NonSemanticClspvReflectionArgumentPodPushConstant:
104                 // Get existing push constant range and see if we need to update
105                 if (arg.pushConstOffset + arg.pushConstantSize > pcRange.offset + pcRange.size)
106                 {
107                     pcRange.size = arg.pushConstOffset + arg.pushConstantSize - pcRange.offset;
108                 }
109                 continue;
110             case NonSemanticClspvReflectionArgumentSampledImage:
111                 descType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
112                 break;
113             case NonSemanticClspvReflectionArgumentStorageImage:
114                 descType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
115                 break;
116             case NonSemanticClspvReflectionArgumentSampler:
117                 descType = VK_DESCRIPTOR_TYPE_SAMPLER;
118                 break;
119             case NonSemanticClspvReflectionArgumentStorageTexelBuffer:
120                 descType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
121                 break;
122             case NonSemanticClspvReflectionArgumentUniformTexelBuffer:
123                 descType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
124                 break;
125             default:
126                 continue;
127         }
128         if (descType != VK_DESCRIPTOR_TYPE_MAX_ENUM)
129         {
130             descriptorSetLayoutDesc.addBinding(arg.descriptorBinding, descType, 1,
131                                                VK_SHADER_STAGE_COMPUTE_BIT, nullptr);
132         }
133     }
134 
135     if (podBufferSize > 0)
136     {
137         mPodBuffer =
138             cl::MemoryPtr(cl::Buffer::Cast(this->mContext->getFrontendObject().createBuffer(
139                 nullptr, cl::MemFlags(CL_MEM_READ_ONLY), podBufferSize, nullptr)));
140     }
141 
142     if (usesPrintf())
143     {
144         mDescriptorSetLayoutDescs[DescriptorSetIndex::Printf].addBinding(
145             deviceProgramData->reflectionData.printfBufferStorage.binding,
146             VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr);
147     }
148 
149     // Get pipeline layout from cache (creates if missed)
150     // A given kernel need not have resulted in use of all the descriptor sets. Unless the
151     // graphicsPipelineLibrary extension is supported, the pipeline layout need all the descriptor
152     // set layouts to be valide. So set them up in the order of their occurrence.
153     mPipelineLayoutDesc = {};
154     for (DescriptorSetIndex index : angle::AllEnums<DescriptorSetIndex>())
155     {
156         if (!mDescriptorSetLayoutDescs[index].empty())
157         {
158             mPipelineLayoutDesc.updateDescriptorSetLayout(index, mDescriptorSetLayoutDescs[index]);
159         }
160     }
161 
162     // push constant setup
163     // push constant size must be multiple of 4
164     pcRange.size = roundUpPow2(pcRange.size, 4u);
165     mPodArgumentPushConstants.resize(pcRange.size);
166 
167     // push constant offset must be multiple of 4, round down to ensure this
168     pcRange.offset = roundDownPow2(pcRange.offset, 4u);
169 
170     mPipelineLayoutDesc.updatePushConstantRange(pcRange.stageFlags, pcRange.offset, pcRange.size);
171 
172     // initialize the descriptor pools
173     // descriptor pools are setup as per their indices
174     return initializeDescriptorPools();
175 }
176 
setArg(cl_uint argIndex,size_t argSize,const void * argValue)177 angle::Result CLKernelVk::setArg(cl_uint argIndex, size_t argSize, const void *argValue)
178 {
179     auto &arg = mArgs.at(argIndex);
180     if (arg.used)
181     {
182         switch (arg.type)
183         {
184             case NonSemanticClspvReflectionArgumentPodPushConstant:
185                 ASSERT(mPodArgumentPushConstants.size() >=
186                        arg.pushConstantSize + arg.pushConstOffset);
187                 arg.handle     = &mPodArgumentPushConstants[arg.pushConstOffset];
188                 arg.handleSize = argSize;
189                 if (argSize > 0 && argValue != nullptr)
190                 {
191                     // Copy the contents since app is free to delete/reassign the contents after
192                     memcpy(arg.handle, argValue, arg.handleSize);
193                 }
194                 break;
195             case NonSemanticClspvReflectionArgumentPodUniform:
196             case NonSemanticClspvReflectionArgumentPodStorageBuffer:
197                 ASSERT(mPodBuffer->getSize() >= argSize + arg.podUniformOffset);
198                 if (argSize > 0 && argValue != nullptr)
199                 {
200                     ANGLE_TRY(mPodBuffer->getImpl<CLBufferVk>().copyFrom(
201                         argValue, arg.podStorageBufferOffset, argSize));
202                 }
203                 break;
204             case NonSemanticClspvReflectionArgumentWorkgroup:
205                 ASSERT(arg.workgroupSize != 0);
206                 mSpecConstants.push_back(
207                     KernelSpecConstant{.ID   = arg.workgroupSpecId,
208                                        .data = static_cast<uint32_t>(argSize / arg.workgroupSize)});
209                 break;
210             case NonSemanticClspvReflectionArgumentUniform:
211             case NonSemanticClspvReflectionArgumentStorageBuffer:
212             case NonSemanticClspvReflectionArgumentStorageImage:
213             case NonSemanticClspvReflectionArgumentSampledImage:
214             case NonSemanticClspvReflectionArgumentUniformTexelBuffer:
215             case NonSemanticClspvReflectionArgumentStorageTexelBuffer:
216                 ASSERT(argSize == sizeof(cl_mem *));
217                 arg.handle     = *static_cast<const cl_mem *>(argValue);
218                 arg.handleSize = argSize;
219                 break;
220             default:
221                 // Just store ptr and size (if we end up here)
222                 arg.handle     = const_cast<void *>(argValue);
223                 arg.handleSize = argSize;
224                 break;
225         }
226     }
227 
228     return angle::Result::Continue;
229 }
230 
createInfo(CLKernelImpl::Info * info) const231 angle::Result CLKernelVk::createInfo(CLKernelImpl::Info *info) const
232 {
233     info->functionName = mName;
234     info->attributes   = mAttributes;
235     info->numArgs      = static_cast<cl_uint>(mArgs.size());
236     for (const auto &arg : mArgs)
237     {
238         ArgInfo argInfo;
239         argInfo.name             = arg.info.name;
240         argInfo.typeName         = arg.info.typeName;
241         argInfo.accessQualifier  = arg.info.accessQualifier;
242         argInfo.addressQualifier = arg.info.addressQualifier;
243         argInfo.typeQualifier    = arg.info.typeQualifier;
244         info->args.push_back(std::move(argInfo));
245     }
246 
247     auto &ctx = mKernel.getProgram().getContext();
248     info->workGroups.resize(ctx.getDevices().size());
249     const CLProgramVk::DeviceProgramData *deviceProgramData = nullptr;
250     for (auto i = 0u; i < ctx.getDevices().size(); ++i)
251     {
252         auto &workGroup     = info->workGroups[i];
253         const auto deviceVk = &ctx.getDevices()[i]->getImpl<CLDeviceVk>();
254         deviceProgramData   = mProgram->getDeviceProgramData(ctx.getDevices()[i]->getNative());
255         if (deviceProgramData == nullptr)
256         {
257             continue;
258         }
259 
260         // TODO: http://anglebug.com/42267005
261         ANGLE_TRY(
262             deviceVk->getInfoSizeT(cl::DeviceInfo::MaxWorkGroupSize, &workGroup.workGroupSize));
263 
264         // TODO: http://anglebug.com/42267004
265         workGroup.privateMemSize = 0;
266         workGroup.localMemSize   = 0;
267 
268         workGroup.prefWorkGroupSizeMultiple = 16u;
269         workGroup.globalWorkSize            = {0, 0, 0};
270         if (deviceProgramData->reflectionData.kernelCompileWorkgroupSize.contains(mName))
271         {
272             workGroup.compileWorkGroupSize = {
273                 deviceProgramData->reflectionData.kernelCompileWorkgroupSize.at(mName)[0],
274                 deviceProgramData->reflectionData.kernelCompileWorkgroupSize.at(mName)[1],
275                 deviceProgramData->reflectionData.kernelCompileWorkgroupSize.at(mName)[2]};
276         }
277         else
278         {
279             workGroup.compileWorkGroupSize = {0, 0, 0};
280         }
281     }
282 
283     return angle::Result::Continue;
284 }
285 
initPipelineLayout()286 angle::Result CLKernelVk::initPipelineLayout()
287 {
288     PipelineLayoutCache *pipelineLayoutCache = mContext->getPipelineLayoutCache();
289     return pipelineLayoutCache->getPipelineLayout(mContext, mPipelineLayoutDesc,
290                                                   mDescriptorSetLayouts, &mPipelineLayout);
291 }
292 
getOrCreateComputePipeline(vk::PipelineCacheAccess * pipelineCache,const cl::NDRange & ndrange,const cl::Device & device,vk::PipelineHelper ** pipelineOut)293 angle::Result CLKernelVk::getOrCreateComputePipeline(vk::PipelineCacheAccess *pipelineCache,
294                                                      const cl::NDRange &ndrange,
295                                                      const cl::Device &device,
296                                                      vk::PipelineHelper **pipelineOut)
297 {
298     const CLProgramVk::DeviceProgramData *devProgramData =
299         getProgram()->getDeviceProgramData(device.getNative());
300     ASSERT(devProgramData != nullptr);
301 
302     // Populate program specialization constants (if any)
303     uint32_t constantDataOffset = 0;
304     std::vector<uint32_t> specConstantData;
305     std::vector<VkSpecializationMapEntry> mapEntries;
306     for (const auto specConstantUsed : devProgramData->reflectionData.specConstantsUsed)
307     {
308         switch (specConstantUsed)
309         {
310             case SpecConstantType::WorkDimension:
311                 specConstantData.push_back(ndrange.workDimensions);
312                 break;
313             case SpecConstantType::WorkgroupSizeX:
314                 specConstantData.push_back(ndrange.localWorkSize[0]);
315                 break;
316             case SpecConstantType::WorkgroupSizeY:
317                 specConstantData.push_back(ndrange.localWorkSize[1]);
318                 break;
319             case SpecConstantType::WorkgroupSizeZ:
320                 specConstantData.push_back(ndrange.localWorkSize[2]);
321                 break;
322             case SpecConstantType::GlobalOffsetX:
323                 specConstantData.push_back(ndrange.globalWorkOffset[0]);
324                 break;
325             case SpecConstantType::GlobalOffsetY:
326                 specConstantData.push_back(ndrange.globalWorkOffset[1]);
327                 break;
328             case SpecConstantType::GlobalOffsetZ:
329                 specConstantData.push_back(ndrange.globalWorkOffset[2]);
330                 break;
331             default:
332                 UNIMPLEMENTED();
333                 continue;
334         }
335         mapEntries.push_back(VkSpecializationMapEntry{
336             .constantID = devProgramData->reflectionData.specConstantIDs[specConstantUsed],
337             .offset     = constantDataOffset,
338             .size       = sizeof(uint32_t)});
339         constantDataOffset += sizeof(uint32_t);
340     }
341     // Populate kernel specialization constants (if any)
342     for (const auto &specConstant : mSpecConstants)
343     {
344         specConstantData.push_back(specConstant.data);
345         mapEntries.push_back(VkSpecializationMapEntry{
346             .constantID = specConstant.ID, .offset = constantDataOffset, .size = sizeof(uint32_t)});
347         constantDataOffset += sizeof(uint32_t);
348     }
349     VkSpecializationInfo computeSpecializationInfo{
350         .mapEntryCount = static_cast<uint32_t>(mapEntries.size()),
351         .pMapEntries   = mapEntries.data(),
352         .dataSize      = specConstantData.size() * sizeof(uint32_t),
353         .pData         = specConstantData.data(),
354     };
355 
356     // Now get or create (on compute pipeline cache miss) compute pipeline and return it
357     vk::ComputePipelineOptions options = vk::GetComputePipelineOptions(
358         vk::PipelineRobustness::NonRobust, vk::PipelineProtectedAccess::Unprotected);
359     return mShaderProgramHelper.getOrCreateComputePipeline(
360         mContext, &mComputePipelineCache, pipelineCache, getPipelineLayout(), options,
361         PipelineSource::Draw, pipelineOut, mName.c_str(), &computeSpecializationInfo);
362 }
363 
usesPrintf() const364 bool CLKernelVk::usesPrintf() const
365 {
366     return mProgram->getDeviceProgramData(mName.c_str())->getKernelFlags(mName) &
367            NonSemanticClspvReflectionMayUsePrintf;
368 }
369 
initializeDescriptorPools()370 angle::Result CLKernelVk::initializeDescriptorPools()
371 {
372     for (DescriptorSetIndex index : angle::AllEnums<DescriptorSetIndex>())
373     {
374         if (!mDescriptorSetLayoutDescs[index].empty())
375         {
376             ANGLE_TRY(mContext->getMetaDescriptorPool().bindCachedDescriptorPool(
377                 mContext, mDescriptorSetLayoutDescs[index], 1,
378                 mContext->getDescriptorSetLayoutCache(), &mDynamicDescriptorPools[index]));
379         }
380     }
381     return angle::Result::Continue;
382 }
383 
allocateDescriptorSet(DescriptorSetIndex index,angle::EnumIterator<DescriptorSetIndex> layoutIndex,vk::OutsideRenderPassCommandBufferHelper * computePassCommands)384 angle::Result CLKernelVk::allocateDescriptorSet(
385     DescriptorSetIndex index,
386     angle::EnumIterator<DescriptorSetIndex> layoutIndex,
387     vk::OutsideRenderPassCommandBufferHelper *computePassCommands)
388 {
389     if (mDescriptorSets[index] && mDescriptorSets[index]->valid())
390     {
391         if (mDescriptorSets[index]->usedByCommandBuffer(computePassCommands->getQueueSerial()))
392         {
393             mDescriptorSets[index].reset();
394         }
395         else
396         {
397             return angle::Result::Continue;
398         }
399     }
400 
401     if (mDynamicDescriptorPools[index]->valid())
402     {
403         ANGLE_TRY(mDynamicDescriptorPools[index]->allocateDescriptorSet(
404             mContext, *mDescriptorSetLayouts[*layoutIndex], &mDescriptorSets[index]));
405         computePassCommands->retainResource(mDescriptorSets[index].get());
406     }
407 
408     return angle::Result::Continue;
409 }
410 }  // namespace rx
411