• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLProgramVk.cpp: Implements the class methods for CLProgramVk.
7 
8 #include "libANGLE/renderer/vulkan/CLProgramVk.h"
9 #include "libANGLE/renderer/vulkan/CLContextVk.h"
10 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
11 
12 #include "libANGLE/CLContext.h"
13 #include "libANGLE/CLKernel.h"
14 #include "libANGLE/CLProgram.h"
15 #include "libANGLE/cl_utils.h"
16 
17 #include "common/system_utils.h"
18 
19 #include "clspv/Compiler.h"
20 
21 #include "spirv/unified1/NonSemanticClspvReflection.h"
22 #include "spirv/unified1/spirv.hpp"
23 
24 #include "spirv-tools/libspirv.hpp"
25 #include "spirv-tools/optimizer.hpp"
26 
27 #include "common/string_utils.h"
28 
29 namespace rx
30 {
31 
32 namespace
33 {
34 #if defined(ANGLE_ENABLE_ASSERTS)
35 constexpr bool kAngleDebug = true;
36 #else
37 constexpr bool kAngleDebug = false;
38 #endif
39 
40 // Used by SPIRV-Tools to parse reflection info
ParseReflection(CLProgramVk::SpvReflectionData & reflectionData,const spv_parsed_instruction_t & spvInstr)41 spv_result_t ParseReflection(CLProgramVk::SpvReflectionData &reflectionData,
42                              const spv_parsed_instruction_t &spvInstr)
43 {
44     // Parse spir-v opcodes
45     switch (spvInstr.opcode)
46     {
47         // --- Clspv specific parsing for below cases ---
48         case spv::OpExtInst:
49         {
50             switch (spvInstr.words[4])
51             {
52                 case NonSemanticClspvReflectionKernel:
53                 {
54                     // Extract kernel name and args - add to kernel args map
55                     std::string functionName = reflectionData.spvStrLookup[spvInstr.words[6]];
56                     uint32_t numArgs         = reflectionData.spvIntLookup[spvInstr.words[7]];
57                     reflectionData.kernelArgsMap[functionName] = CLKernelArguments();
58                     reflectionData.kernelArgsMap[functionName].resize(numArgs);
59 
60                     // Store kernel flags and attributes
61                     reflectionData.kernelFlags[functionName] =
62                         reflectionData.spvIntLookup[spvInstr.words[8]];
63                     reflectionData.kernelAttributes[functionName] =
64                         reflectionData.spvStrLookup[spvInstr.words[9]];
65 
66                     // Save kernel name to reflection table for later use/lookup in parser routine
67                     reflectionData.spvStrLookup[spvInstr.words[2]] = std::string(functionName);
68                     break;
69                 }
70                 case NonSemanticClspvReflectionArgumentInfo:
71                 {
72                     CLKernelVk::ArgInfo kernelArgInfo;
73                     kernelArgInfo.name = reflectionData.spvStrLookup[spvInstr.words[5]];
74                     // If instruction has more than 5 instruction operands (minus instruction
75                     // name/opcode), that means we have arg qualifiers. ArgumentInfo also counts as
76                     // an operand for OpExtInst. In below example, [ %e %f %g %h ] are the arg
77                     // qualifier operands.
78                     //
79                     // %a = OpExtInst %b %c ArgumentInfo %d [ %e %f %g %h ]
80                     if (spvInstr.num_operands > 5)
81                     {
82                         kernelArgInfo.typeName = reflectionData.spvStrLookup[spvInstr.words[6]];
83                         kernelArgInfo.addressQualifier =
84                             reflectionData.spvIntLookup[spvInstr.words[7]];
85                         kernelArgInfo.accessQualifier =
86                             reflectionData.spvIntLookup[spvInstr.words[8]];
87                         kernelArgInfo.typeQualifier =
88                             reflectionData.spvIntLookup[spvInstr.words[9]];
89                     }
90                     // Store kern arg for later lookup
91                     reflectionData.kernelArgInfos[spvInstr.words[2]] = std::move(kernelArgInfo);
92                     break;
93                 }
94                 case NonSemanticClspvReflectionArgumentPodUniform:
95                 case NonSemanticClspvReflectionArgumentPointerUniform:
96                 case NonSemanticClspvReflectionArgumentPodStorageBuffer:
97                 {
98                     CLKernelArgument kernelArg;
99                     if (spvInstr.num_operands == 11)
100                     {
101                         const CLKernelVk::ArgInfo &kernelArgInfo =
102                             reflectionData.kernelArgInfos[spvInstr.words[11]];
103                         kernelArg.info.name             = kernelArgInfo.name;
104                         kernelArg.info.typeName         = kernelArgInfo.typeName;
105                         kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
106                         kernelArg.info.accessQualifier  = kernelArgInfo.accessQualifier;
107                         kernelArg.info.typeQualifier    = kernelArgInfo.typeQualifier;
108                     }
109                     CLKernelArguments &kernelArgs =
110                         reflectionData
111                             .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
112                     kernelArg.type    = spvInstr.words[4];
113                     kernelArg.used    = true;
114                     kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
115                     kernelArg.op3     = reflectionData.spvIntLookup[spvInstr.words[7]];
116                     kernelArg.op4     = reflectionData.spvIntLookup[spvInstr.words[8]];
117                     kernelArg.op5     = reflectionData.spvIntLookup[spvInstr.words[9]];
118                     kernelArg.op6     = reflectionData.spvIntLookup[spvInstr.words[10]];
119 
120                     if (!kernelArgs.empty())
121                     {
122                         kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
123                     }
124                     break;
125                 }
126                 case NonSemanticClspvReflectionArgumentUniform:
127                 case NonSemanticClspvReflectionArgumentWorkgroup:
128                 case NonSemanticClspvReflectionArgumentStorageBuffer:
129                 case NonSemanticClspvReflectionArgumentPodPushConstant:
130                 case NonSemanticClspvReflectionArgumentPointerPushConstant:
131                 {
132                     CLKernelArgument kernelArg;
133                     if (spvInstr.num_operands == 9)
134                     {
135                         const CLKernelVk::ArgInfo &kernelArgInfo =
136                             reflectionData.kernelArgInfos[spvInstr.words[9]];
137                         kernelArg.info.name             = kernelArgInfo.name;
138                         kernelArg.info.typeName         = kernelArgInfo.typeName;
139                         kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
140                         kernelArg.info.accessQualifier  = kernelArgInfo.accessQualifier;
141                         kernelArg.info.typeQualifier    = kernelArgInfo.typeQualifier;
142                     }
143                     CLKernelArguments &kernelArgs =
144                         reflectionData
145                             .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
146                     kernelArg.type    = spvInstr.words[4];
147                     kernelArg.used    = true;
148                     kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
149                     kernelArg.op3     = reflectionData.spvIntLookup[spvInstr.words[7]];
150                     kernelArg.op4     = reflectionData.spvIntLookup[spvInstr.words[8]];
151                     kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
152                     break;
153                 }
154                 case NonSemanticClspvReflectionPushConstantGlobalSize:
155                 case NonSemanticClspvReflectionPushConstantGlobalOffset:
156                 case NonSemanticClspvReflectionPushConstantRegionOffset:
157                 {
158                     uint32_t offset = reflectionData.spvIntLookup[spvInstr.words[5]];
159                     uint32_t size   = reflectionData.spvIntLookup[spvInstr.words[6]];
160                     reflectionData.pushConstants[spvInstr.words[4]] = {
161                         .stageFlags = 0, .offset = offset, .size = size};
162                     break;
163                 }
164                 case NonSemanticClspvReflectionSpecConstantWorkgroupSize:
165                 {
166                     reflectionData.specConstantWorkgroupSizeIDs = {
167                         reflectionData.spvIntLookup[spvInstr.words[5]],
168                         reflectionData.spvIntLookup[spvInstr.words[6]],
169                         reflectionData.spvIntLookup[spvInstr.words[7]]};
170                     break;
171                 }
172                 case NonSemanticClspvReflectionPropertyRequiredWorkgroupSize:
173                 {
174                     reflectionData.kernelCompileWorkgroupSize
175                         [reflectionData.spvStrLookup[spvInstr.words[5]]] = {
176                         reflectionData.spvIntLookup[spvInstr.words[6]],
177                         reflectionData.spvIntLookup[spvInstr.words[7]],
178                         reflectionData.spvIntLookup[spvInstr.words[8]]};
179                     break;
180                 }
181                 default:
182                     break;
183             }
184             break;
185         }
186         // --- Regular SPIR-V opcode parsing for below cases ---
187         case spv::OpString:
188         {
189             reflectionData.spvStrLookup[spvInstr.words[1]] =
190                 reinterpret_cast<const char *>(&spvInstr.words[2]);
191             break;
192         }
193         case spv::OpConstant:
194         {
195             reflectionData.spvIntLookup[spvInstr.words[2]] = spvInstr.words[3];
196             break;
197         }
198         default:
199             break;
200     }
201     return SPV_SUCCESS;
202 }
203 
ProcessBuildOptions(const std::vector<std::string> & optionTokens,CLProgramVk::BuildType buildType)204 std::string ProcessBuildOptions(const std::vector<std::string> &optionTokens,
205                                 CLProgramVk::BuildType buildType)
206 {
207     std::string processedOptions;
208 
209     // Need to remove/replace options that are not 1-1 mapped to clspv
210     for (const std::string &optionToken : optionTokens)
211     {
212         if (optionToken == "-create-library" && buildType == CLProgramVk::BuildType::LINK)
213         {
214             processedOptions += " --output-format=bc";
215             continue;
216         }
217         processedOptions += " " + optionToken;
218     }
219 
220     switch (buildType)
221     {
222         case CLProgramVk::BuildType::COMPILE:
223             processedOptions += " --output-format=bc";
224             break;
225         case CLProgramVk::BuildType::LINK:
226             processedOptions += " -x ir";
227             break;
228         default:
229             break;
230     }
231 
232     // Other internal Clspv compiler flags that are needed/required
233     processedOptions += " --long-vector";
234 
235     return processedOptions;
236 }
237 
238 }  // namespace
239 
operator ()()240 void CLAsyncBuildTask::operator()()
241 {
242     ANGLE_TRACE_EVENT0("gpu.angle", "CLProgramVk::buildInternal (async)");
243     CLProgramVk::ScopedProgramCallback spc(mNotify);
244     if (!mProgramVk->buildInternal(mDevices, mOptions, mInternalOptions, mBuildType,
245                                    mLinkProgramsList))
246     {
247         ERR() << "Async build failed for program (" << mProgramVk
248               << ")! Check the build status or build log for details.";
249     }
250 }
251 
CLProgramVk(const cl::Program & program)252 CLProgramVk::CLProgramVk(const cl::Program &program)
253     : CLProgramImpl(program), mContext(&program.getContext().getImpl<CLContextVk>())
254 {}
255 
init()256 angle::Result CLProgramVk::init()
257 {
258     cl::DevicePtrs devices;
259     ANGLE_TRY(mContext->getDevices(&devices));
260 
261     // The devices associated with the program object are the devices associated with context
262     for (const cl::RefPointer<cl::Device> &device : devices)
263     {
264         mAssociatedDevicePrograms[device->getNative()] = DeviceProgramData{};
265     }
266 
267     return angle::Result::Continue;
268 }
269 
init(const size_t * lengths,const unsigned char ** binaries,cl_int * binaryStatus)270 angle::Result CLProgramVk::init(const size_t *lengths,
271                                 const unsigned char **binaries,
272                                 cl_int *binaryStatus)
273 {
274     // The devices associated with program come from device_list param from
275     // clCreateProgramWithBinary
276     for (const cl::DevicePtr &device : mProgram.getDevices())
277     {
278         const unsigned char *binaryHandle = *binaries++;
279         size_t binarySize                 = *lengths++;
280 
281         // Check for header
282         if (binarySize < sizeof(ProgramBinaryOutputHeader))
283         {
284             if (binaryStatus)
285             {
286                 *binaryStatus++ = CL_INVALID_BINARY;
287             }
288             ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
289         }
290         binarySize -= sizeof(ProgramBinaryOutputHeader);
291 
292         // Check for valid binary version from header
293         const ProgramBinaryOutputHeader *binaryHeader =
294             reinterpret_cast<const ProgramBinaryOutputHeader *>(binaryHandle);
295         if (binaryHeader == nullptr)
296         {
297             ERR() << "NULL binary header!";
298             if (binaryStatus)
299             {
300                 *binaryStatus++ = CL_INVALID_BINARY;
301             }
302             ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
303         }
304         else if (binaryHeader->headerVersion < kBinaryVersion)
305         {
306             ERR() << "Binary version not compatible with runtime!";
307             if (binaryStatus)
308             {
309                 *binaryStatus++ = CL_INVALID_BINARY;
310             }
311             ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
312         }
313         binaryHandle += sizeof(ProgramBinaryOutputHeader);
314 
315         // See what kind of binary we have (i.e. SPIR-V or LLVM Bitcode)
316         // https://llvm.org/docs/BitCodeFormat.html#llvm-ir-magic-number
317         // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_magic_number
318         constexpr uint32_t LLVM_BC_MAGIC = 0xDEC04342;
319         constexpr uint32_t SPIRV_MAGIC   = 0x07230203;
320         const uint32_t &firstWord        = reinterpret_cast<const uint32_t *>(binaryHandle)[0];
321         bool isBC                        = firstWord == LLVM_BC_MAGIC;
322         bool isSPV                       = firstWord == SPIRV_MAGIC;
323         if (!isBC && !isSPV)
324         {
325             ERR() << "Binary is neither SPIR-V nor LLVM Bitcode!";
326             if (binaryStatus)
327             {
328                 *binaryStatus++ = CL_INVALID_BINARY;
329             }
330             ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
331         }
332 
333         // Add device binary to program
334         DeviceProgramData deviceBinary;
335         deviceBinary.binaryType  = binaryHeader->binaryType;
336         deviceBinary.buildStatus = binaryHeader->buildStatus;
337         switch (deviceBinary.binaryType)
338         {
339             case CL_PROGRAM_BINARY_TYPE_EXECUTABLE:
340                 deviceBinary.binary.assign(binarySize / sizeof(uint32_t), 0);
341                 std::memcpy(deviceBinary.binary.data(), binaryHandle, binarySize);
342                 break;
343             case CL_PROGRAM_BINARY_TYPE_LIBRARY:
344             case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT:
345                 deviceBinary.IR.assign(binarySize, 0);
346                 std::memcpy(deviceBinary.IR.data(), binaryHandle, binarySize);
347                 break;
348             default:
349                 UNREACHABLE();
350                 ERR() << "Invalid binary type!";
351                 if (binaryStatus)
352                 {
353                     *binaryStatus++ = CL_INVALID_BINARY;
354                 }
355                 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
356         }
357         mAssociatedDevicePrograms[device->getNative()] = std::move(deviceBinary);
358         if (binaryStatus)
359         {
360             *binaryStatus++ = CL_SUCCESS;
361         }
362     }
363 
364     return angle::Result::Continue;
365 }
366 
~CLProgramVk()367 CLProgramVk::~CLProgramVk()
368 {
369     for (vk::BindingPointer<rx::vk::DynamicDescriptorPool> &pool : mDescriptorPools)
370     {
371         pool.reset();
372     }
373     mPoolBinding.reset();
374     mShader.get().destroy(mContext->getDevice());
375     mMetaDescriptorPool.destroy(mContext->getRenderer());
376     mDescSetLayoutCache.destroy(mContext->getRenderer());
377     mPipelineLayoutCache.destroy(mContext->getRenderer());
378 }
379 
build(const cl::DevicePtrs & devices,const char * options,cl::Program * notify)380 angle::Result CLProgramVk::build(const cl::DevicePtrs &devices,
381                                  const char *options,
382                                  cl::Program *notify)
383 {
384     BuildType buildType = !mProgram.getSource().empty() ? BuildType::BUILD : BuildType::BINARY;
385     const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices();
386 
387     if (notify)
388     {
389         std::shared_ptr<angle::WaitableEvent> asyncEvent =
390             getPlatform()->postMultiThreadWorkerTask(std::make_shared<CLAsyncBuildTask>(
391                 this, devicePtrs, std::string(options ? options : ""), "", buildType,
392                 LinkProgramsList{}, notify));
393         ASSERT(asyncEvent != nullptr);
394     }
395     else
396     {
397         if (!buildInternal(devicePtrs, std::string(options ? options : ""), "", buildType,
398                            LinkProgramsList{}))
399         {
400             ANGLE_CL_RETURN_ERROR(CL_BUILD_PROGRAM_FAILURE);
401         }
402     }
403     return angle::Result::Continue;
404 }
405 
compile(const cl::DevicePtrs & devices,const char * options,const cl::ProgramPtrs & inputHeaders,const char ** headerIncludeNames,cl::Program * notify)406 angle::Result CLProgramVk::compile(const cl::DevicePtrs &devices,
407                                    const char *options,
408                                    const cl::ProgramPtrs &inputHeaders,
409                                    const char **headerIncludeNames,
410                                    cl::Program *notify)
411 {
412     const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices();
413 
414     // Ensure OS temp dir is available
415     std::string internalCompileOpts;
416     Optional<std::string> tmpDir = angle::GetTempDirectory();
417     if (!tmpDir.valid())
418     {
419         ERR() << "Failed to open OS temp dir";
420         ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION);
421     }
422     internalCompileOpts += inputHeaders.empty() ? "" : " -I" + tmpDir.value();
423 
424     // Dump input headers to OS temp directory
425     for (size_t i = 0; i < inputHeaders.size(); ++i)
426     {
427         const std::string &inputHeaderSrc =
428             inputHeaders.at(i)->getImpl<CLProgramVk>().mProgram.getSource();
429         std::string headerFilePath(angle::ConcatenatePath(tmpDir.value(), headerIncludeNames[i]));
430 
431         // Sanitize path so we can use "/" as universal path separator
432         angle::MakeForwardSlashThePathSeparator(headerFilePath);
433         size_t baseDirPos = headerFilePath.find_last_of("/");
434 
435         // Ensure parent dir(s) exists
436         if (!angle::CreateDirectories(headerFilePath.substr(0, baseDirPos)))
437         {
438             ERR() << "Failed to create output path(s) for header(s)!";
439             ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION);
440         }
441         writeFile(headerFilePath.c_str(), inputHeaderSrc.data(), inputHeaderSrc.size());
442     }
443 
444     // Perform compile
445     if (notify)
446     {
447         std::shared_ptr<angle::WaitableEvent> asyncEvent =
448             mProgram.getContext().getPlatform().getMultiThreadPool()->postWorkerTask(
449                 std::make_shared<CLAsyncBuildTask>(
450                     this, devicePtrs, std::string(options ? options : ""), internalCompileOpts,
451                     BuildType::COMPILE, LinkProgramsList{}, notify));
452         ASSERT(asyncEvent != nullptr);
453     }
454     else
455     {
456         if (!buildInternal(devicePtrs, std::string(options ? options : ""), internalCompileOpts,
457                            BuildType::COMPILE, LinkProgramsList{}))
458         {
459             ANGLE_CL_RETURN_ERROR(CL_COMPILE_PROGRAM_FAILURE);
460         }
461     }
462 
463     return angle::Result::Continue;
464 }
465 
getInfo(cl::ProgramInfo name,size_t valueSize,void * value,size_t * valueSizeRet) const466 angle::Result CLProgramVk::getInfo(cl::ProgramInfo name,
467                                    size_t valueSize,
468                                    void *value,
469                                    size_t *valueSizeRet) const
470 {
471     cl_uint valUInt            = 0u;
472     void *valPointer           = nullptr;
473     const void *copyValue      = nullptr;
474     size_t copySize            = 0u;
475     unsigned char **outputBins = reinterpret_cast<unsigned char **>(value);
476     std::string kernelNamesList;
477     std::vector<size_t> vBinarySizes;
478 
479     switch (name)
480     {
481         case cl::ProgramInfo::NumKernels:
482             for (const auto &deviceProgram : mAssociatedDevicePrograms)
483             {
484                 valUInt += static_cast<decltype(valUInt)>(deviceProgram.second.numKernels());
485             }
486             copyValue = &valUInt;
487             copySize  = sizeof(valUInt);
488             break;
489         case cl::ProgramInfo::BinarySizes:
490         {
491             for (const auto &deviceProgram : mAssociatedDevicePrograms)
492             {
493                 vBinarySizes.push_back(
494                     sizeof(ProgramBinaryOutputHeader) +
495                     (deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
496                          ? deviceProgram.second.binary.size() * sizeof(uint32_t)
497                          : deviceProgram.second.IR.size()));
498             }
499             valPointer = vBinarySizes.data();
500             copyValue  = valPointer;
501             copySize   = vBinarySizes.size() * sizeof(size_t);
502             break;
503         }
504         case cl::ProgramInfo::Binaries:
505             for (const auto &deviceProgram : mAssociatedDevicePrograms)
506             {
507                 const void *bin =
508                     deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
509                         ? reinterpret_cast<const void *>(deviceProgram.second.binary.data())
510                         : reinterpret_cast<const void *>(deviceProgram.second.IR.data());
511                 size_t binSize =
512                     deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
513                         ? deviceProgram.second.binary.size() * sizeof(uint32_t)
514                         : deviceProgram.second.IR.size();
515                 ProgramBinaryOutputHeader header{.headerVersion = kBinaryVersion,
516                                                  .binaryType    = deviceProgram.second.binaryType,
517                                                  .buildStatus   = deviceProgram.second.buildStatus};
518 
519                 if (outputBins != nullptr)
520                 {
521                     if (*outputBins != nullptr)
522                     {
523                         std::memcpy(*outputBins, &header, sizeof(ProgramBinaryOutputHeader));
524                         std::memcpy((*outputBins) + sizeof(ProgramBinaryOutputHeader), bin,
525                                     binSize);
526                     }
527                     outputBins++;
528                 }
529 
530                 // Spec just wants pointer size here
531                 copySize += sizeof(unsigned char *);
532             }
533             // We already copied the (headers + binaries) over - nothing else left to copy
534             copyValue = nullptr;
535             break;
536         case cl::ProgramInfo::KernelNames:
537             for (const auto &deviceProgram : mAssociatedDevicePrograms)
538             {
539                 kernelNamesList = deviceProgram.second.getKernelNames();
540             }
541             valPointer = kernelNamesList.data();
542             copyValue  = valPointer;
543             copySize   = kernelNamesList.size() + 1;
544             break;
545         default:
546             UNREACHABLE();
547     }
548 
549     if ((value != nullptr) && (copyValue != nullptr))
550     {
551         std::memcpy(value, copyValue, copySize);
552     }
553 
554     if (valueSizeRet != nullptr)
555     {
556         *valueSizeRet = copySize;
557     }
558 
559     return angle::Result::Continue;
560 }
561 
getBuildInfo(const cl::Device & device,cl::ProgramBuildInfo name,size_t valueSize,void * value,size_t * valueSizeRet) const562 angle::Result CLProgramVk::getBuildInfo(const cl::Device &device,
563                                         cl::ProgramBuildInfo name,
564                                         size_t valueSize,
565                                         void *value,
566                                         size_t *valueSizeRet) const
567 {
568     cl_uint valUInt                            = 0;
569     cl_build_status valStatus                  = 0;
570     const void *copyValue                      = nullptr;
571     size_t copySize                            = 0;
572     const DeviceProgramData *deviceProgramData = getDeviceProgramData(device.getNative());
573 
574     switch (name)
575     {
576         case cl::ProgramBuildInfo::Status:
577             valStatus = deviceProgramData->buildStatus;
578             copyValue = &valStatus;
579             copySize  = sizeof(valStatus);
580             break;
581         case cl::ProgramBuildInfo::Log:
582             copyValue = deviceProgramData->buildLog.c_str();
583             copySize  = deviceProgramData->buildLog.size() + 1;
584             break;
585         case cl::ProgramBuildInfo::Options:
586             copyValue = mProgramOpts.c_str();
587             copySize  = mProgramOpts.size() + 1;
588             break;
589         case cl::ProgramBuildInfo::BinaryType:
590             valUInt   = deviceProgramData->binaryType;
591             copyValue = &valUInt;
592             copySize  = sizeof(valUInt);
593             break;
594         case cl::ProgramBuildInfo::GlobalVariableTotalSize:
595             // Returns 0 if device does not support program scope global variables.
596             valUInt   = 0;
597             copyValue = &valUInt;
598             copySize  = sizeof(valUInt);
599             break;
600         default:
601             UNREACHABLE();
602     }
603 
604     if ((value != nullptr) && (copyValue != nullptr))
605     {
606         memcpy(value, copyValue, std::min(valueSize, copySize));
607     }
608 
609     if (valueSizeRet != nullptr)
610     {
611         *valueSizeRet = copySize;
612     }
613 
614     return angle::Result::Continue;
615 }
616 
createKernel(const cl::Kernel & kernel,const char * name,CLKernelImpl::Ptr * kernelOut)617 angle::Result CLProgramVk::createKernel(const cl::Kernel &kernel,
618                                         const char *name,
619                                         CLKernelImpl::Ptr *kernelOut)
620 {
621     std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
622 
623     const auto devProgram = getDeviceProgramData(name);
624     ASSERT(devProgram != nullptr);
625 
626     // Create kernel
627     CLKernelArguments kernelArgs = devProgram->getKernelArguments(name);
628     std::string kernelAttributes = devProgram->getKernelAttributes(name);
629     std::string kernelName       = std::string(name ? name : "");
630     CLKernelVk::Ptr kernelImpl   = CLKernelVk::Ptr(
631         new (std::nothrow) CLKernelVk(kernel, kernelName, kernelAttributes, kernelArgs));
632     if (kernelImpl == nullptr)
633     {
634         ERR() << "Could not create kernel obj!";
635         ANGLE_CL_RETURN_ERROR(CL_OUT_OF_HOST_MEMORY);
636     }
637 
638     // Update push contant range and add layout bindings for arguments
639     vk::DescriptorSetLayoutDesc descriptorSetLayoutDesc;
640     VkPushConstantRange pcRange = devProgram->pushConstRange;
641     for (const auto &arg : kernelImpl->getArgs())
642     {
643         VkDescriptorType descType = VK_DESCRIPTOR_TYPE_MAX_ENUM;
644         switch (arg.type)
645         {
646             case NonSemanticClspvReflectionArgumentStorageBuffer:
647             case NonSemanticClspvReflectionArgumentPodStorageBuffer:
648                 descType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
649                 break;
650             case NonSemanticClspvReflectionArgumentUniform:
651             case NonSemanticClspvReflectionArgumentPodUniform:
652             case NonSemanticClspvReflectionArgumentPointerUniform:
653                 descType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
654                 break;
655             case NonSemanticClspvReflectionArgumentPodPushConstant:
656                 // Get existing push constant range and see if we need to update
657                 if (arg.pushConstOffset + arg.pushConstantSize > pcRange.offset + pcRange.size)
658                 {
659                     pcRange.size = arg.pushConstOffset + arg.pushConstantSize - pcRange.offset;
660                 }
661                 continue;
662             default:
663                 continue;
664         }
665         descriptorSetLayoutDesc.update(arg.descriptorBinding, descType, 1,
666                                        VK_SHADER_STAGE_COMPUTE_BIT, nullptr);
667     }
668 
669     // Get descriptor set layout from cache (creates if missed)
670     ANGLE_CL_IMPL_TRY_ERROR(
671         mDescSetLayoutCache.getDescriptorSetLayout(
672             mContext, descriptorSetLayoutDesc,
673             &kernelImpl->getDescriptorSetLayouts()[DescriptorSetIndex::ShaderResource]),
674         CL_INVALID_OPERATION);
675 
676     // Get pipeline layout from cache (creates if missed)
677     vk::PipelineLayoutDesc pipelineLayoutDesc;
678     pipelineLayoutDesc.updateDescriptorSetLayout(DescriptorSetIndex::ShaderResource,
679                                                  descriptorSetLayoutDesc);
680     pipelineLayoutDesc.updatePushConstantRange(pcRange.stageFlags, pcRange.offset, pcRange.size);
681     ANGLE_CL_IMPL_TRY_ERROR(mPipelineLayoutCache.getPipelineLayout(
682                                 mContext, pipelineLayoutDesc, kernelImpl->getDescriptorSetLayouts(),
683                                 &kernelImpl->getPipelineLayout()),
684                             CL_INVALID_OPERATION);
685 
686     // Setup descriptor pool
687     ANGLE_CL_IMPL_TRY_ERROR(mMetaDescriptorPool.bindCachedDescriptorPool(
688                                 mContext, descriptorSetLayoutDesc, 1, &mDescSetLayoutCache,
689                                 &mDescriptorPools[DescriptorSetIndex::ShaderResource]),
690                             CL_INVALID_OPERATION);
691 
692     *kernelOut = std::move(kernelImpl);
693 
694     return angle::Result::Continue;
695 }
696 
createKernels(cl_uint numKernels,CLKernelImpl::CreateFuncs & createFuncs,cl_uint * numKernelsRet)697 angle::Result CLProgramVk::createKernels(cl_uint numKernels,
698                                          CLKernelImpl::CreateFuncs &createFuncs,
699                                          cl_uint *numKernelsRet)
700 {
701     size_t numDevKernels = 0;
702     for (const auto &dev : mAssociatedDevicePrograms)
703     {
704         numDevKernels += dev.second.numKernels();
705     }
706     if (numKernelsRet != nullptr)
707     {
708         *numKernelsRet = static_cast<cl_uint>(numDevKernels);
709     }
710 
711     if (numKernels != 0)
712     {
713         for (const auto &dev : mAssociatedDevicePrograms)
714         {
715             for (const auto &kernArgMap : dev.second.getKernelArgsMap())
716             {
717                 createFuncs.emplace_back([this, &kernArgMap](const cl::Kernel &kern) {
718                     CLKernelImpl::Ptr implPtr = nullptr;
719                     ANGLE_CL_IMPL_TRY(this->createKernel(kern, kernArgMap.first.c_str(), &implPtr));
720                     return CLKernelImpl::Ptr(std::move(implPtr));
721                 });
722             }
723         }
724     }
725     return angle::Result::Continue;
726 }
727 
getDeviceProgramData(const _cl_device_id * device) const728 const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
729     const _cl_device_id *device) const
730 {
731     if (!mAssociatedDevicePrograms.contains(device))
732     {
733         WARN() << "Device (" << device << ") is not associated with program (" << this << ") !";
734         return nullptr;
735     }
736     return &mAssociatedDevicePrograms.at(device);
737 }
738 
getDeviceProgramData(const char * kernelName) const739 const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
740     const char *kernelName) const
741 {
742     for (const auto &deviceProgram : mAssociatedDevicePrograms)
743     {
744         if (deviceProgram.second.containsKernel(kernelName))
745         {
746             return &deviceProgram.second;
747         }
748     }
749     WARN() << "Kernel name (" << kernelName << ") is not associated with program (" << this
750            << ") !";
751     return nullptr;
752 }
753 
buildInternal(const cl::DevicePtrs & devices,std::string options,std::string internalOptions,BuildType buildType,const LinkProgramsList & LinkProgramsList)754 bool CLProgramVk::buildInternal(const cl::DevicePtrs &devices,
755                                 std::string options,
756                                 std::string internalOptions,
757                                 BuildType buildType,
758                                 const LinkProgramsList &LinkProgramsList)
759 {
760     std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
761 
762     // Cache original options string
763     mProgramOpts = options;
764 
765     // Process options and append any other internal (required) options for clspv
766     std::vector<std::string> optionTokens;
767     angle::SplitStringAlongWhitespace(options + " " + internalOptions, &optionTokens);
768     const bool createLibrary     = std::find(optionTokens.begin(), optionTokens.end(),
769                                              "-create-library") != optionTokens.end();
770     std::string processedOptions = ProcessBuildOptions(optionTokens, buildType);
771 
772     // Build for each associated device
773     for (size_t i = 0; i < devices.size(); ++i)
774     {
775         const cl::RefPointer<cl::Device> &device = devices.at(i);
776         DeviceProgramData &deviceProgramData     = mAssociatedDevicePrograms[device->getNative()];
777         deviceProgramData.buildStatus            = CL_BUILD_IN_PROGRESS;
778 
779         if (buildType != BuildType::BINARY)
780         {
781             // Invoke clspv
782             switch (buildType)
783             {
784                 case BuildType::BUILD:
785                 case BuildType::COMPILE:
786                 {
787                     ScopedClspvContext clspvCtx;
788                     const char *clSrc   = mProgram.getSource().c_str();
789                     ClspvError clspvRet = clspvCompileFromSourcesString(
790                         1, NULL, static_cast<const char **>(&clSrc), processedOptions.c_str(),
791                         &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog);
792                     deviceProgramData.buildLog =
793                         clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
794                     if (clspvRet != CLSPV_SUCCESS)
795                     {
796                         ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
797                         deviceProgramData.buildStatus = CL_BUILD_ERROR;
798                         return false;
799                     }
800 
801                     if (buildType == BuildType::COMPILE)
802                     {
803                         deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
804                         std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
805                                     clspvCtx.mOutputBinSize);
806                         deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
807                     }
808                     else
809                     {
810                         deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t),
811                                                         0);
812                         std::memcpy(deviceProgramData.binary.data(), clspvCtx.mOutputBin,
813                                     clspvCtx.mOutputBinSize);
814                         deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
815                     }
816                     break;
817                 }
818                 case BuildType::LINK:
819                 {
820                     ScopedClspvContext clspvCtx;
821                     std::vector<size_t> vSizes;
822                     std::vector<const char *> vBins;
823                     const LinkPrograms &linkPrograms = LinkProgramsList.at(i);
824                     for (const CLProgramVk::DeviceProgramData *linkProgramData : linkPrograms)
825                     {
826                         vSizes.push_back(linkProgramData->IR.size());
827                         vBins.push_back(linkProgramData->IR.data());
828                     }
829                     ClspvError clspvRet = clspvCompileFromSourcesString(
830                         linkPrograms.size(), vSizes.data(), vBins.data(), processedOptions.c_str(),
831                         &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog);
832                     deviceProgramData.buildLog =
833                         clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
834                     if (clspvRet != CLSPV_SUCCESS)
835                     {
836                         ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
837                         deviceProgramData.buildStatus = CL_BUILD_ERROR;
838                         return false;
839                     }
840 
841                     if (createLibrary)
842                     {
843                         deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
844                         std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
845                                     clspvCtx.mOutputBinSize);
846                         deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY;
847                     }
848                     else
849                     {
850                         deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t),
851                                                         0);
852                         std::memcpy(deviceProgramData.binary.data(),
853                                     reinterpret_cast<char *>(clspvCtx.mOutputBin),
854                                     clspvCtx.mOutputBinSize);
855                         deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
856                     }
857                     break;
858                 }
859                 default:
860                     UNREACHABLE();
861                     return false;
862             }
863         }
864 
865         // Extract reflection info from spv binary and populate reflection data, as well as create
866         // the shader module
867         if (deviceProgramData.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
868         {
869             spvtools::SpirvTools spvTool(SPV_ENV_UNIVERSAL_1_5);
870             bool parseRet = spvTool.Parse(
871                 deviceProgramData.binary,
872                 [](const spv_endianness_t endianess, const spv_parsed_header_t &instruction) {
873                     return SPV_SUCCESS;
874                 },
875                 [&deviceProgramData](const spv_parsed_instruction_t &instruction) {
876                     return ParseReflection(deviceProgramData.reflectionData, instruction);
877                 });
878             if (!parseRet)
879             {
880                 ERR() << "Failed to parse reflection info from SPIR-V!";
881                 deviceProgramData.buildStatus = CL_BUILD_ERROR;
882                 return false;
883             }
884 
885             if (mShader.get().valid())
886             {
887                 // User is recompiling program, we need to recreate the shader module
888                 mShader.get().destroy(mContext->getDevice());
889             }
890             // Strip SPIR-V binary if Vk implementation does not support non-semantic info
891             angle::spirv::Blob spvBlob =
892                 !mContext->getRenderer()->getFeatures().supportsShaderNonSemanticInfo.enabled
893                     ? stripReflection(&deviceProgramData)
894                     : deviceProgramData.binary;
895             ASSERT(!spvBlob.empty());
896             if (IsError(vk::InitShaderModule(mContext, &mShader.get(), spvBlob.data(),
897                                              spvBlob.size() * sizeof(uint32_t))))
898             {
899                 ERR() << "Failed to init Vulkan Shader Module!";
900                 deviceProgramData.buildStatus = CL_BUILD_ERROR;
901                 return false;
902             }
903 
904             // Setup inital push constant range
905             uint32_t pushConstantMinOffet = UINT32_MAX, pushConstantMaxOffset = 0,
906                      pushConstantMaxSize = 0;
907             for (const auto &pushConstant : deviceProgramData.reflectionData.pushConstants)
908             {
909                 pushConstantMinOffet = pushConstant.second.offset < pushConstantMinOffet
910                                            ? pushConstant.second.offset
911                                            : pushConstantMinOffet;
912                 if (pushConstant.second.offset >= pushConstantMaxOffset)
913                 {
914                     pushConstantMaxOffset = pushConstant.second.offset;
915                     pushConstantMaxSize   = pushConstant.second.size;
916                 }
917             }
918             deviceProgramData.pushConstRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
919             deviceProgramData.pushConstRange.offset =
920                 pushConstantMinOffet == UINT32_MAX ? 0 : pushConstantMinOffet;
921             deviceProgramData.pushConstRange.size = pushConstantMaxOffset + pushConstantMaxSize;
922 
923             if (kAngleDebug)
924             {
925                 if (mContext->getFeatures().clDumpVkSpirv.enabled)
926                 {
927                     angle::spirv::Print(deviceProgramData.binary);
928                 }
929             }
930         }
931         deviceProgramData.buildStatus = CL_BUILD_SUCCESS;
932     }
933     return true;
934 }
935 
stripReflection(const DeviceProgramData * deviceProgramData)936 angle::spirv::Blob CLProgramVk::stripReflection(const DeviceProgramData *deviceProgramData)
937 {
938     angle::spirv::Blob binaryStripped;
939     spvtools::Optimizer opt(SPV_ENV_UNIVERSAL_1_5);
940     opt.RegisterPass(spvtools::CreateStripReflectInfoPass());
941     spvtools::OptimizerOptions optOptions;
942     optOptions.set_run_validator(false);
943     if (!opt.Run(deviceProgramData->binary.data(), deviceProgramData->binary.size(),
944                  &binaryStripped, optOptions))
945     {
946         ERR() << "Could not strip reflection data from binary!";
947     }
948     return binaryStripped;
949 }
950 
allocateDescriptorSet(const vk::DescriptorSetLayout & descriptorSetLayout,VkDescriptorSet * descriptorSetOut)951 angle::Result CLProgramVk::allocateDescriptorSet(const vk::DescriptorSetLayout &descriptorSetLayout,
952                                                  VkDescriptorSet *descriptorSetOut)
953 {
954     if (mDescriptorPools[DescriptorSetIndex::ShaderResource].get().valid())
955     {
956         ANGLE_CL_IMPL_TRY_ERROR(
957             mDescriptorPools[DescriptorSetIndex::ShaderResource].get().allocateDescriptorSet(
958                 mContext, descriptorSetLayout, &mPoolBinding, descriptorSetOut),
959             CL_INVALID_OPERATION);
960     }
961     return angle::Result::Continue;
962 }
963 
964 }  // namespace rx
965