1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLProgramVk.cpp: Implements the class methods for CLProgramVk.
7
8 #include "libANGLE/renderer/vulkan/CLProgramVk.h"
9 #include "libANGLE/renderer/vulkan/CLContextVk.h"
10 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
11
12 #include "libANGLE/CLContext.h"
13 #include "libANGLE/CLKernel.h"
14 #include "libANGLE/CLProgram.h"
15 #include "libANGLE/cl_utils.h"
16
17 #include "common/system_utils.h"
18
19 #include "clspv/Compiler.h"
20
21 #include "spirv/unified1/NonSemanticClspvReflection.h"
22 #include "spirv/unified1/spirv.hpp"
23
24 #include "spirv-tools/libspirv.hpp"
25 #include "spirv-tools/optimizer.hpp"
26
27 #include "common/string_utils.h"
28
29 namespace rx
30 {
31
32 namespace
33 {
34 #if defined(ANGLE_ENABLE_ASSERTS)
35 constexpr bool kAngleDebug = true;
36 #else
37 constexpr bool kAngleDebug = false;
38 #endif
39
40 // Used by SPIRV-Tools to parse reflection info
ParseReflection(CLProgramVk::SpvReflectionData & reflectionData,const spv_parsed_instruction_t & spvInstr)41 spv_result_t ParseReflection(CLProgramVk::SpvReflectionData &reflectionData,
42 const spv_parsed_instruction_t &spvInstr)
43 {
44 // Parse spir-v opcodes
45 switch (spvInstr.opcode)
46 {
47 // --- Clspv specific parsing for below cases ---
48 case spv::OpExtInst:
49 {
50 switch (spvInstr.words[4])
51 {
52 case NonSemanticClspvReflectionKernel:
53 {
54 // Extract kernel name and args - add to kernel args map
55 std::string functionName = reflectionData.spvStrLookup[spvInstr.words[6]];
56 uint32_t numArgs = reflectionData.spvIntLookup[spvInstr.words[7]];
57 reflectionData.kernelArgsMap[functionName] = CLKernelArguments();
58 reflectionData.kernelArgsMap[functionName].resize(numArgs);
59
60 // Store kernel flags and attributes
61 reflectionData.kernelFlags[functionName] =
62 reflectionData.spvIntLookup[spvInstr.words[8]];
63 reflectionData.kernelAttributes[functionName] =
64 reflectionData.spvStrLookup[spvInstr.words[9]];
65
66 // Save kernel name to reflection table for later use/lookup in parser routine
67 reflectionData.spvStrLookup[spvInstr.words[2]] = std::string(functionName);
68 break;
69 }
70 case NonSemanticClspvReflectionArgumentInfo:
71 {
72 CLKernelVk::ArgInfo kernelArgInfo;
73 kernelArgInfo.name = reflectionData.spvStrLookup[spvInstr.words[5]];
74 // If instruction has more than 5 instruction operands (minus instruction
75 // name/opcode), that means we have arg qualifiers. ArgumentInfo also counts as
76 // an operand for OpExtInst. In below example, [ %e %f %g %h ] are the arg
77 // qualifier operands.
78 //
79 // %a = OpExtInst %b %c ArgumentInfo %d [ %e %f %g %h ]
80 if (spvInstr.num_operands > 5)
81 {
82 kernelArgInfo.typeName = reflectionData.spvStrLookup[spvInstr.words[6]];
83 kernelArgInfo.addressQualifier =
84 reflectionData.spvIntLookup[spvInstr.words[7]];
85 kernelArgInfo.accessQualifier =
86 reflectionData.spvIntLookup[spvInstr.words[8]];
87 kernelArgInfo.typeQualifier =
88 reflectionData.spvIntLookup[spvInstr.words[9]];
89 }
90 // Store kern arg for later lookup
91 reflectionData.kernelArgInfos[spvInstr.words[2]] = std::move(kernelArgInfo);
92 break;
93 }
94 case NonSemanticClspvReflectionArgumentPodUniform:
95 case NonSemanticClspvReflectionArgumentPointerUniform:
96 case NonSemanticClspvReflectionArgumentPodStorageBuffer:
97 {
98 CLKernelArgument kernelArg;
99 if (spvInstr.num_operands == 11)
100 {
101 const CLKernelVk::ArgInfo &kernelArgInfo =
102 reflectionData.kernelArgInfos[spvInstr.words[11]];
103 kernelArg.info.name = kernelArgInfo.name;
104 kernelArg.info.typeName = kernelArgInfo.typeName;
105 kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
106 kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier;
107 kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier;
108 }
109 CLKernelArguments &kernelArgs =
110 reflectionData
111 .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
112 kernelArg.type = spvInstr.words[4];
113 kernelArg.used = true;
114 kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
115 kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]];
116 kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]];
117 kernelArg.op5 = reflectionData.spvIntLookup[spvInstr.words[9]];
118 kernelArg.op6 = reflectionData.spvIntLookup[spvInstr.words[10]];
119
120 if (!kernelArgs.empty())
121 {
122 kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
123 }
124 break;
125 }
126 case NonSemanticClspvReflectionArgumentUniform:
127 case NonSemanticClspvReflectionArgumentWorkgroup:
128 case NonSemanticClspvReflectionArgumentStorageBuffer:
129 case NonSemanticClspvReflectionArgumentPodPushConstant:
130 case NonSemanticClspvReflectionArgumentPointerPushConstant:
131 {
132 CLKernelArgument kernelArg;
133 if (spvInstr.num_operands == 9)
134 {
135 const CLKernelVk::ArgInfo &kernelArgInfo =
136 reflectionData.kernelArgInfos[spvInstr.words[9]];
137 kernelArg.info.name = kernelArgInfo.name;
138 kernelArg.info.typeName = kernelArgInfo.typeName;
139 kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
140 kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier;
141 kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier;
142 }
143 CLKernelArguments &kernelArgs =
144 reflectionData
145 .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
146 kernelArg.type = spvInstr.words[4];
147 kernelArg.used = true;
148 kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
149 kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]];
150 kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]];
151 kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
152 break;
153 }
154 case NonSemanticClspvReflectionPushConstantGlobalSize:
155 case NonSemanticClspvReflectionPushConstantGlobalOffset:
156 case NonSemanticClspvReflectionPushConstantRegionOffset:
157 {
158 uint32_t offset = reflectionData.spvIntLookup[spvInstr.words[5]];
159 uint32_t size = reflectionData.spvIntLookup[spvInstr.words[6]];
160 reflectionData.pushConstants[spvInstr.words[4]] = {
161 .stageFlags = 0, .offset = offset, .size = size};
162 break;
163 }
164 case NonSemanticClspvReflectionSpecConstantWorkgroupSize:
165 {
166 reflectionData.specConstantWorkgroupSizeIDs = {
167 reflectionData.spvIntLookup[spvInstr.words[5]],
168 reflectionData.spvIntLookup[spvInstr.words[6]],
169 reflectionData.spvIntLookup[spvInstr.words[7]]};
170 break;
171 }
172 case NonSemanticClspvReflectionPropertyRequiredWorkgroupSize:
173 {
174 reflectionData.kernelCompileWorkgroupSize
175 [reflectionData.spvStrLookup[spvInstr.words[5]]] = {
176 reflectionData.spvIntLookup[spvInstr.words[6]],
177 reflectionData.spvIntLookup[spvInstr.words[7]],
178 reflectionData.spvIntLookup[spvInstr.words[8]]};
179 break;
180 }
181 default:
182 break;
183 }
184 break;
185 }
186 // --- Regular SPIR-V opcode parsing for below cases ---
187 case spv::OpString:
188 {
189 reflectionData.spvStrLookup[spvInstr.words[1]] =
190 reinterpret_cast<const char *>(&spvInstr.words[2]);
191 break;
192 }
193 case spv::OpConstant:
194 {
195 reflectionData.spvIntLookup[spvInstr.words[2]] = spvInstr.words[3];
196 break;
197 }
198 default:
199 break;
200 }
201 return SPV_SUCCESS;
202 }
203
ProcessBuildOptions(const std::vector<std::string> & optionTokens,CLProgramVk::BuildType buildType)204 std::string ProcessBuildOptions(const std::vector<std::string> &optionTokens,
205 CLProgramVk::BuildType buildType)
206 {
207 std::string processedOptions;
208
209 // Need to remove/replace options that are not 1-1 mapped to clspv
210 for (const std::string &optionToken : optionTokens)
211 {
212 if (optionToken == "-create-library" && buildType == CLProgramVk::BuildType::LINK)
213 {
214 processedOptions += " --output-format=bc";
215 continue;
216 }
217 processedOptions += " " + optionToken;
218 }
219
220 switch (buildType)
221 {
222 case CLProgramVk::BuildType::COMPILE:
223 processedOptions += " --output-format=bc";
224 break;
225 case CLProgramVk::BuildType::LINK:
226 processedOptions += " -x ir";
227 break;
228 default:
229 break;
230 }
231
232 // Other internal Clspv compiler flags that are needed/required
233 processedOptions += " --long-vector";
234
235 return processedOptions;
236 }
237
238 } // namespace
239
operator ()()240 void CLAsyncBuildTask::operator()()
241 {
242 ANGLE_TRACE_EVENT0("gpu.angle", "CLProgramVk::buildInternal (async)");
243 CLProgramVk::ScopedProgramCallback spc(mNotify);
244 if (!mProgramVk->buildInternal(mDevices, mOptions, mInternalOptions, mBuildType,
245 mLinkProgramsList))
246 {
247 ERR() << "Async build failed for program (" << mProgramVk
248 << ")! Check the build status or build log for details.";
249 }
250 }
251
CLProgramVk(const cl::Program & program)252 CLProgramVk::CLProgramVk(const cl::Program &program)
253 : CLProgramImpl(program), mContext(&program.getContext().getImpl<CLContextVk>())
254 {}
255
init()256 angle::Result CLProgramVk::init()
257 {
258 cl::DevicePtrs devices;
259 ANGLE_TRY(mContext->getDevices(&devices));
260
261 // The devices associated with the program object are the devices associated with context
262 for (const cl::RefPointer<cl::Device> &device : devices)
263 {
264 mAssociatedDevicePrograms[device->getNative()] = DeviceProgramData{};
265 }
266
267 return angle::Result::Continue;
268 }
269
init(const size_t * lengths,const unsigned char ** binaries,cl_int * binaryStatus)270 angle::Result CLProgramVk::init(const size_t *lengths,
271 const unsigned char **binaries,
272 cl_int *binaryStatus)
273 {
274 // The devices associated with program come from device_list param from
275 // clCreateProgramWithBinary
276 for (const cl::DevicePtr &device : mProgram.getDevices())
277 {
278 const unsigned char *binaryHandle = *binaries++;
279 size_t binarySize = *lengths++;
280
281 // Check for header
282 if (binarySize < sizeof(ProgramBinaryOutputHeader))
283 {
284 if (binaryStatus)
285 {
286 *binaryStatus++ = CL_INVALID_BINARY;
287 }
288 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
289 }
290 binarySize -= sizeof(ProgramBinaryOutputHeader);
291
292 // Check for valid binary version from header
293 const ProgramBinaryOutputHeader *binaryHeader =
294 reinterpret_cast<const ProgramBinaryOutputHeader *>(binaryHandle);
295 if (binaryHeader == nullptr)
296 {
297 ERR() << "NULL binary header!";
298 if (binaryStatus)
299 {
300 *binaryStatus++ = CL_INVALID_BINARY;
301 }
302 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
303 }
304 else if (binaryHeader->headerVersion < kBinaryVersion)
305 {
306 ERR() << "Binary version not compatible with runtime!";
307 if (binaryStatus)
308 {
309 *binaryStatus++ = CL_INVALID_BINARY;
310 }
311 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
312 }
313 binaryHandle += sizeof(ProgramBinaryOutputHeader);
314
315 // See what kind of binary we have (i.e. SPIR-V or LLVM Bitcode)
316 // https://llvm.org/docs/BitCodeFormat.html#llvm-ir-magic-number
317 // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_magic_number
318 constexpr uint32_t LLVM_BC_MAGIC = 0xDEC04342;
319 constexpr uint32_t SPIRV_MAGIC = 0x07230203;
320 const uint32_t &firstWord = reinterpret_cast<const uint32_t *>(binaryHandle)[0];
321 bool isBC = firstWord == LLVM_BC_MAGIC;
322 bool isSPV = firstWord == SPIRV_MAGIC;
323 if (!isBC && !isSPV)
324 {
325 ERR() << "Binary is neither SPIR-V nor LLVM Bitcode!";
326 if (binaryStatus)
327 {
328 *binaryStatus++ = CL_INVALID_BINARY;
329 }
330 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
331 }
332
333 // Add device binary to program
334 DeviceProgramData deviceBinary;
335 deviceBinary.binaryType = binaryHeader->binaryType;
336 deviceBinary.buildStatus = binaryHeader->buildStatus;
337 switch (deviceBinary.binaryType)
338 {
339 case CL_PROGRAM_BINARY_TYPE_EXECUTABLE:
340 deviceBinary.binary.assign(binarySize / sizeof(uint32_t), 0);
341 std::memcpy(deviceBinary.binary.data(), binaryHandle, binarySize);
342 break;
343 case CL_PROGRAM_BINARY_TYPE_LIBRARY:
344 case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT:
345 deviceBinary.IR.assign(binarySize, 0);
346 std::memcpy(deviceBinary.IR.data(), binaryHandle, binarySize);
347 break;
348 default:
349 UNREACHABLE();
350 ERR() << "Invalid binary type!";
351 if (binaryStatus)
352 {
353 *binaryStatus++ = CL_INVALID_BINARY;
354 }
355 ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
356 }
357 mAssociatedDevicePrograms[device->getNative()] = std::move(deviceBinary);
358 if (binaryStatus)
359 {
360 *binaryStatus++ = CL_SUCCESS;
361 }
362 }
363
364 return angle::Result::Continue;
365 }
366
~CLProgramVk()367 CLProgramVk::~CLProgramVk()
368 {
369 for (vk::BindingPointer<rx::vk::DynamicDescriptorPool> &pool : mDescriptorPools)
370 {
371 pool.reset();
372 }
373 mPoolBinding.reset();
374 mShader.get().destroy(mContext->getDevice());
375 mMetaDescriptorPool.destroy(mContext->getRenderer());
376 mDescSetLayoutCache.destroy(mContext->getRenderer());
377 mPipelineLayoutCache.destroy(mContext->getRenderer());
378 }
379
build(const cl::DevicePtrs & devices,const char * options,cl::Program * notify)380 angle::Result CLProgramVk::build(const cl::DevicePtrs &devices,
381 const char *options,
382 cl::Program *notify)
383 {
384 BuildType buildType = !mProgram.getSource().empty() ? BuildType::BUILD : BuildType::BINARY;
385 const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices();
386
387 if (notify)
388 {
389 std::shared_ptr<angle::WaitableEvent> asyncEvent =
390 getPlatform()->postMultiThreadWorkerTask(std::make_shared<CLAsyncBuildTask>(
391 this, devicePtrs, std::string(options ? options : ""), "", buildType,
392 LinkProgramsList{}, notify));
393 ASSERT(asyncEvent != nullptr);
394 }
395 else
396 {
397 if (!buildInternal(devicePtrs, std::string(options ? options : ""), "", buildType,
398 LinkProgramsList{}))
399 {
400 ANGLE_CL_RETURN_ERROR(CL_BUILD_PROGRAM_FAILURE);
401 }
402 }
403 return angle::Result::Continue;
404 }
405
compile(const cl::DevicePtrs & devices,const char * options,const cl::ProgramPtrs & inputHeaders,const char ** headerIncludeNames,cl::Program * notify)406 angle::Result CLProgramVk::compile(const cl::DevicePtrs &devices,
407 const char *options,
408 const cl::ProgramPtrs &inputHeaders,
409 const char **headerIncludeNames,
410 cl::Program *notify)
411 {
412 const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices();
413
414 // Ensure OS temp dir is available
415 std::string internalCompileOpts;
416 Optional<std::string> tmpDir = angle::GetTempDirectory();
417 if (!tmpDir.valid())
418 {
419 ERR() << "Failed to open OS temp dir";
420 ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION);
421 }
422 internalCompileOpts += inputHeaders.empty() ? "" : " -I" + tmpDir.value();
423
424 // Dump input headers to OS temp directory
425 for (size_t i = 0; i < inputHeaders.size(); ++i)
426 {
427 const std::string &inputHeaderSrc =
428 inputHeaders.at(i)->getImpl<CLProgramVk>().mProgram.getSource();
429 std::string headerFilePath(angle::ConcatenatePath(tmpDir.value(), headerIncludeNames[i]));
430
431 // Sanitize path so we can use "/" as universal path separator
432 angle::MakeForwardSlashThePathSeparator(headerFilePath);
433 size_t baseDirPos = headerFilePath.find_last_of("/");
434
435 // Ensure parent dir(s) exists
436 if (!angle::CreateDirectories(headerFilePath.substr(0, baseDirPos)))
437 {
438 ERR() << "Failed to create output path(s) for header(s)!";
439 ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION);
440 }
441 writeFile(headerFilePath.c_str(), inputHeaderSrc.data(), inputHeaderSrc.size());
442 }
443
444 // Perform compile
445 if (notify)
446 {
447 std::shared_ptr<angle::WaitableEvent> asyncEvent =
448 mProgram.getContext().getPlatform().getMultiThreadPool()->postWorkerTask(
449 std::make_shared<CLAsyncBuildTask>(
450 this, devicePtrs, std::string(options ? options : ""), internalCompileOpts,
451 BuildType::COMPILE, LinkProgramsList{}, notify));
452 ASSERT(asyncEvent != nullptr);
453 }
454 else
455 {
456 if (!buildInternal(devicePtrs, std::string(options ? options : ""), internalCompileOpts,
457 BuildType::COMPILE, LinkProgramsList{}))
458 {
459 ANGLE_CL_RETURN_ERROR(CL_COMPILE_PROGRAM_FAILURE);
460 }
461 }
462
463 return angle::Result::Continue;
464 }
465
getInfo(cl::ProgramInfo name,size_t valueSize,void * value,size_t * valueSizeRet) const466 angle::Result CLProgramVk::getInfo(cl::ProgramInfo name,
467 size_t valueSize,
468 void *value,
469 size_t *valueSizeRet) const
470 {
471 cl_uint valUInt = 0u;
472 void *valPointer = nullptr;
473 const void *copyValue = nullptr;
474 size_t copySize = 0u;
475 unsigned char **outputBins = reinterpret_cast<unsigned char **>(value);
476 std::string kernelNamesList;
477 std::vector<size_t> vBinarySizes;
478
479 switch (name)
480 {
481 case cl::ProgramInfo::NumKernels:
482 for (const auto &deviceProgram : mAssociatedDevicePrograms)
483 {
484 valUInt += static_cast<decltype(valUInt)>(deviceProgram.second.numKernels());
485 }
486 copyValue = &valUInt;
487 copySize = sizeof(valUInt);
488 break;
489 case cl::ProgramInfo::BinarySizes:
490 {
491 for (const auto &deviceProgram : mAssociatedDevicePrograms)
492 {
493 vBinarySizes.push_back(
494 sizeof(ProgramBinaryOutputHeader) +
495 (deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
496 ? deviceProgram.second.binary.size() * sizeof(uint32_t)
497 : deviceProgram.second.IR.size()));
498 }
499 valPointer = vBinarySizes.data();
500 copyValue = valPointer;
501 copySize = vBinarySizes.size() * sizeof(size_t);
502 break;
503 }
504 case cl::ProgramInfo::Binaries:
505 for (const auto &deviceProgram : mAssociatedDevicePrograms)
506 {
507 const void *bin =
508 deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
509 ? reinterpret_cast<const void *>(deviceProgram.second.binary.data())
510 : reinterpret_cast<const void *>(deviceProgram.second.IR.data());
511 size_t binSize =
512 deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
513 ? deviceProgram.second.binary.size() * sizeof(uint32_t)
514 : deviceProgram.second.IR.size();
515 ProgramBinaryOutputHeader header{.headerVersion = kBinaryVersion,
516 .binaryType = deviceProgram.second.binaryType,
517 .buildStatus = deviceProgram.second.buildStatus};
518
519 if (outputBins != nullptr)
520 {
521 if (*outputBins != nullptr)
522 {
523 std::memcpy(*outputBins, &header, sizeof(ProgramBinaryOutputHeader));
524 std::memcpy((*outputBins) + sizeof(ProgramBinaryOutputHeader), bin,
525 binSize);
526 }
527 outputBins++;
528 }
529
530 // Spec just wants pointer size here
531 copySize += sizeof(unsigned char *);
532 }
533 // We already copied the (headers + binaries) over - nothing else left to copy
534 copyValue = nullptr;
535 break;
536 case cl::ProgramInfo::KernelNames:
537 for (const auto &deviceProgram : mAssociatedDevicePrograms)
538 {
539 kernelNamesList = deviceProgram.second.getKernelNames();
540 }
541 valPointer = kernelNamesList.data();
542 copyValue = valPointer;
543 copySize = kernelNamesList.size() + 1;
544 break;
545 default:
546 UNREACHABLE();
547 }
548
549 if ((value != nullptr) && (copyValue != nullptr))
550 {
551 std::memcpy(value, copyValue, copySize);
552 }
553
554 if (valueSizeRet != nullptr)
555 {
556 *valueSizeRet = copySize;
557 }
558
559 return angle::Result::Continue;
560 }
561
getBuildInfo(const cl::Device & device,cl::ProgramBuildInfo name,size_t valueSize,void * value,size_t * valueSizeRet) const562 angle::Result CLProgramVk::getBuildInfo(const cl::Device &device,
563 cl::ProgramBuildInfo name,
564 size_t valueSize,
565 void *value,
566 size_t *valueSizeRet) const
567 {
568 cl_uint valUInt = 0;
569 cl_build_status valStatus = 0;
570 const void *copyValue = nullptr;
571 size_t copySize = 0;
572 const DeviceProgramData *deviceProgramData = getDeviceProgramData(device.getNative());
573
574 switch (name)
575 {
576 case cl::ProgramBuildInfo::Status:
577 valStatus = deviceProgramData->buildStatus;
578 copyValue = &valStatus;
579 copySize = sizeof(valStatus);
580 break;
581 case cl::ProgramBuildInfo::Log:
582 copyValue = deviceProgramData->buildLog.c_str();
583 copySize = deviceProgramData->buildLog.size() + 1;
584 break;
585 case cl::ProgramBuildInfo::Options:
586 copyValue = mProgramOpts.c_str();
587 copySize = mProgramOpts.size() + 1;
588 break;
589 case cl::ProgramBuildInfo::BinaryType:
590 valUInt = deviceProgramData->binaryType;
591 copyValue = &valUInt;
592 copySize = sizeof(valUInt);
593 break;
594 case cl::ProgramBuildInfo::GlobalVariableTotalSize:
595 // Returns 0 if device does not support program scope global variables.
596 valUInt = 0;
597 copyValue = &valUInt;
598 copySize = sizeof(valUInt);
599 break;
600 default:
601 UNREACHABLE();
602 }
603
604 if ((value != nullptr) && (copyValue != nullptr))
605 {
606 memcpy(value, copyValue, std::min(valueSize, copySize));
607 }
608
609 if (valueSizeRet != nullptr)
610 {
611 *valueSizeRet = copySize;
612 }
613
614 return angle::Result::Continue;
615 }
616
createKernel(const cl::Kernel & kernel,const char * name,CLKernelImpl::Ptr * kernelOut)617 angle::Result CLProgramVk::createKernel(const cl::Kernel &kernel,
618 const char *name,
619 CLKernelImpl::Ptr *kernelOut)
620 {
621 std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
622
623 const auto devProgram = getDeviceProgramData(name);
624 ASSERT(devProgram != nullptr);
625
626 // Create kernel
627 CLKernelArguments kernelArgs = devProgram->getKernelArguments(name);
628 std::string kernelAttributes = devProgram->getKernelAttributes(name);
629 std::string kernelName = std::string(name ? name : "");
630 CLKernelVk::Ptr kernelImpl = CLKernelVk::Ptr(
631 new (std::nothrow) CLKernelVk(kernel, kernelName, kernelAttributes, kernelArgs));
632 if (kernelImpl == nullptr)
633 {
634 ERR() << "Could not create kernel obj!";
635 ANGLE_CL_RETURN_ERROR(CL_OUT_OF_HOST_MEMORY);
636 }
637
638 // Update push contant range and add layout bindings for arguments
639 vk::DescriptorSetLayoutDesc descriptorSetLayoutDesc;
640 VkPushConstantRange pcRange = devProgram->pushConstRange;
641 for (const auto &arg : kernelImpl->getArgs())
642 {
643 VkDescriptorType descType = VK_DESCRIPTOR_TYPE_MAX_ENUM;
644 switch (arg.type)
645 {
646 case NonSemanticClspvReflectionArgumentStorageBuffer:
647 case NonSemanticClspvReflectionArgumentPodStorageBuffer:
648 descType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
649 break;
650 case NonSemanticClspvReflectionArgumentUniform:
651 case NonSemanticClspvReflectionArgumentPodUniform:
652 case NonSemanticClspvReflectionArgumentPointerUniform:
653 descType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
654 break;
655 case NonSemanticClspvReflectionArgumentPodPushConstant:
656 // Get existing push constant range and see if we need to update
657 if (arg.pushConstOffset + arg.pushConstantSize > pcRange.offset + pcRange.size)
658 {
659 pcRange.size = arg.pushConstOffset + arg.pushConstantSize - pcRange.offset;
660 }
661 continue;
662 default:
663 continue;
664 }
665 descriptorSetLayoutDesc.update(arg.descriptorBinding, descType, 1,
666 VK_SHADER_STAGE_COMPUTE_BIT, nullptr);
667 }
668
669 // Get descriptor set layout from cache (creates if missed)
670 ANGLE_CL_IMPL_TRY_ERROR(
671 mDescSetLayoutCache.getDescriptorSetLayout(
672 mContext, descriptorSetLayoutDesc,
673 &kernelImpl->getDescriptorSetLayouts()[DescriptorSetIndex::ShaderResource]),
674 CL_INVALID_OPERATION);
675
676 // Get pipeline layout from cache (creates if missed)
677 vk::PipelineLayoutDesc pipelineLayoutDesc;
678 pipelineLayoutDesc.updateDescriptorSetLayout(DescriptorSetIndex::ShaderResource,
679 descriptorSetLayoutDesc);
680 pipelineLayoutDesc.updatePushConstantRange(pcRange.stageFlags, pcRange.offset, pcRange.size);
681 ANGLE_CL_IMPL_TRY_ERROR(mPipelineLayoutCache.getPipelineLayout(
682 mContext, pipelineLayoutDesc, kernelImpl->getDescriptorSetLayouts(),
683 &kernelImpl->getPipelineLayout()),
684 CL_INVALID_OPERATION);
685
686 // Setup descriptor pool
687 ANGLE_CL_IMPL_TRY_ERROR(mMetaDescriptorPool.bindCachedDescriptorPool(
688 mContext, descriptorSetLayoutDesc, 1, &mDescSetLayoutCache,
689 &mDescriptorPools[DescriptorSetIndex::ShaderResource]),
690 CL_INVALID_OPERATION);
691
692 *kernelOut = std::move(kernelImpl);
693
694 return angle::Result::Continue;
695 }
696
createKernels(cl_uint numKernels,CLKernelImpl::CreateFuncs & createFuncs,cl_uint * numKernelsRet)697 angle::Result CLProgramVk::createKernels(cl_uint numKernels,
698 CLKernelImpl::CreateFuncs &createFuncs,
699 cl_uint *numKernelsRet)
700 {
701 size_t numDevKernels = 0;
702 for (const auto &dev : mAssociatedDevicePrograms)
703 {
704 numDevKernels += dev.second.numKernels();
705 }
706 if (numKernelsRet != nullptr)
707 {
708 *numKernelsRet = static_cast<cl_uint>(numDevKernels);
709 }
710
711 if (numKernels != 0)
712 {
713 for (const auto &dev : mAssociatedDevicePrograms)
714 {
715 for (const auto &kernArgMap : dev.second.getKernelArgsMap())
716 {
717 createFuncs.emplace_back([this, &kernArgMap](const cl::Kernel &kern) {
718 CLKernelImpl::Ptr implPtr = nullptr;
719 ANGLE_CL_IMPL_TRY(this->createKernel(kern, kernArgMap.first.c_str(), &implPtr));
720 return CLKernelImpl::Ptr(std::move(implPtr));
721 });
722 }
723 }
724 }
725 return angle::Result::Continue;
726 }
727
getDeviceProgramData(const _cl_device_id * device) const728 const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
729 const _cl_device_id *device) const
730 {
731 if (!mAssociatedDevicePrograms.contains(device))
732 {
733 WARN() << "Device (" << device << ") is not associated with program (" << this << ") !";
734 return nullptr;
735 }
736 return &mAssociatedDevicePrograms.at(device);
737 }
738
getDeviceProgramData(const char * kernelName) const739 const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
740 const char *kernelName) const
741 {
742 for (const auto &deviceProgram : mAssociatedDevicePrograms)
743 {
744 if (deviceProgram.second.containsKernel(kernelName))
745 {
746 return &deviceProgram.second;
747 }
748 }
749 WARN() << "Kernel name (" << kernelName << ") is not associated with program (" << this
750 << ") !";
751 return nullptr;
752 }
753
buildInternal(const cl::DevicePtrs & devices,std::string options,std::string internalOptions,BuildType buildType,const LinkProgramsList & LinkProgramsList)754 bool CLProgramVk::buildInternal(const cl::DevicePtrs &devices,
755 std::string options,
756 std::string internalOptions,
757 BuildType buildType,
758 const LinkProgramsList &LinkProgramsList)
759 {
760 std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex);
761
762 // Cache original options string
763 mProgramOpts = options;
764
765 // Process options and append any other internal (required) options for clspv
766 std::vector<std::string> optionTokens;
767 angle::SplitStringAlongWhitespace(options + " " + internalOptions, &optionTokens);
768 const bool createLibrary = std::find(optionTokens.begin(), optionTokens.end(),
769 "-create-library") != optionTokens.end();
770 std::string processedOptions = ProcessBuildOptions(optionTokens, buildType);
771
772 // Build for each associated device
773 for (size_t i = 0; i < devices.size(); ++i)
774 {
775 const cl::RefPointer<cl::Device> &device = devices.at(i);
776 DeviceProgramData &deviceProgramData = mAssociatedDevicePrograms[device->getNative()];
777 deviceProgramData.buildStatus = CL_BUILD_IN_PROGRESS;
778
779 if (buildType != BuildType::BINARY)
780 {
781 // Invoke clspv
782 switch (buildType)
783 {
784 case BuildType::BUILD:
785 case BuildType::COMPILE:
786 {
787 ScopedClspvContext clspvCtx;
788 const char *clSrc = mProgram.getSource().c_str();
789 ClspvError clspvRet = clspvCompileFromSourcesString(
790 1, NULL, static_cast<const char **>(&clSrc), processedOptions.c_str(),
791 &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog);
792 deviceProgramData.buildLog =
793 clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
794 if (clspvRet != CLSPV_SUCCESS)
795 {
796 ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
797 deviceProgramData.buildStatus = CL_BUILD_ERROR;
798 return false;
799 }
800
801 if (buildType == BuildType::COMPILE)
802 {
803 deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
804 std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
805 clspvCtx.mOutputBinSize);
806 deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
807 }
808 else
809 {
810 deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t),
811 0);
812 std::memcpy(deviceProgramData.binary.data(), clspvCtx.mOutputBin,
813 clspvCtx.mOutputBinSize);
814 deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
815 }
816 break;
817 }
818 case BuildType::LINK:
819 {
820 ScopedClspvContext clspvCtx;
821 std::vector<size_t> vSizes;
822 std::vector<const char *> vBins;
823 const LinkPrograms &linkPrograms = LinkProgramsList.at(i);
824 for (const CLProgramVk::DeviceProgramData *linkProgramData : linkPrograms)
825 {
826 vSizes.push_back(linkProgramData->IR.size());
827 vBins.push_back(linkProgramData->IR.data());
828 }
829 ClspvError clspvRet = clspvCompileFromSourcesString(
830 linkPrograms.size(), vSizes.data(), vBins.data(), processedOptions.c_str(),
831 &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog);
832 deviceProgramData.buildLog =
833 clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
834 if (clspvRet != CLSPV_SUCCESS)
835 {
836 ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
837 deviceProgramData.buildStatus = CL_BUILD_ERROR;
838 return false;
839 }
840
841 if (createLibrary)
842 {
843 deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
844 std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
845 clspvCtx.mOutputBinSize);
846 deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY;
847 }
848 else
849 {
850 deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t),
851 0);
852 std::memcpy(deviceProgramData.binary.data(),
853 reinterpret_cast<char *>(clspvCtx.mOutputBin),
854 clspvCtx.mOutputBinSize);
855 deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
856 }
857 break;
858 }
859 default:
860 UNREACHABLE();
861 return false;
862 }
863 }
864
865 // Extract reflection info from spv binary and populate reflection data, as well as create
866 // the shader module
867 if (deviceProgramData.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
868 {
869 spvtools::SpirvTools spvTool(SPV_ENV_UNIVERSAL_1_5);
870 bool parseRet = spvTool.Parse(
871 deviceProgramData.binary,
872 [](const spv_endianness_t endianess, const spv_parsed_header_t &instruction) {
873 return SPV_SUCCESS;
874 },
875 [&deviceProgramData](const spv_parsed_instruction_t &instruction) {
876 return ParseReflection(deviceProgramData.reflectionData, instruction);
877 });
878 if (!parseRet)
879 {
880 ERR() << "Failed to parse reflection info from SPIR-V!";
881 deviceProgramData.buildStatus = CL_BUILD_ERROR;
882 return false;
883 }
884
885 if (mShader.get().valid())
886 {
887 // User is recompiling program, we need to recreate the shader module
888 mShader.get().destroy(mContext->getDevice());
889 }
890 // Strip SPIR-V binary if Vk implementation does not support non-semantic info
891 angle::spirv::Blob spvBlob =
892 !mContext->getRenderer()->getFeatures().supportsShaderNonSemanticInfo.enabled
893 ? stripReflection(&deviceProgramData)
894 : deviceProgramData.binary;
895 ASSERT(!spvBlob.empty());
896 if (IsError(vk::InitShaderModule(mContext, &mShader.get(), spvBlob.data(),
897 spvBlob.size() * sizeof(uint32_t))))
898 {
899 ERR() << "Failed to init Vulkan Shader Module!";
900 deviceProgramData.buildStatus = CL_BUILD_ERROR;
901 return false;
902 }
903
904 // Setup inital push constant range
905 uint32_t pushConstantMinOffet = UINT32_MAX, pushConstantMaxOffset = 0,
906 pushConstantMaxSize = 0;
907 for (const auto &pushConstant : deviceProgramData.reflectionData.pushConstants)
908 {
909 pushConstantMinOffet = pushConstant.second.offset < pushConstantMinOffet
910 ? pushConstant.second.offset
911 : pushConstantMinOffet;
912 if (pushConstant.second.offset >= pushConstantMaxOffset)
913 {
914 pushConstantMaxOffset = pushConstant.second.offset;
915 pushConstantMaxSize = pushConstant.second.size;
916 }
917 }
918 deviceProgramData.pushConstRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
919 deviceProgramData.pushConstRange.offset =
920 pushConstantMinOffet == UINT32_MAX ? 0 : pushConstantMinOffet;
921 deviceProgramData.pushConstRange.size = pushConstantMaxOffset + pushConstantMaxSize;
922
923 if (kAngleDebug)
924 {
925 if (mContext->getFeatures().clDumpVkSpirv.enabled)
926 {
927 angle::spirv::Print(deviceProgramData.binary);
928 }
929 }
930 }
931 deviceProgramData.buildStatus = CL_BUILD_SUCCESS;
932 }
933 return true;
934 }
935
stripReflection(const DeviceProgramData * deviceProgramData)936 angle::spirv::Blob CLProgramVk::stripReflection(const DeviceProgramData *deviceProgramData)
937 {
938 angle::spirv::Blob binaryStripped;
939 spvtools::Optimizer opt(SPV_ENV_UNIVERSAL_1_5);
940 opt.RegisterPass(spvtools::CreateStripReflectInfoPass());
941 spvtools::OptimizerOptions optOptions;
942 optOptions.set_run_validator(false);
943 if (!opt.Run(deviceProgramData->binary.data(), deviceProgramData->binary.size(),
944 &binaryStripped, optOptions))
945 {
946 ERR() << "Could not strip reflection data from binary!";
947 }
948 return binaryStripped;
949 }
950
allocateDescriptorSet(const vk::DescriptorSetLayout & descriptorSetLayout,VkDescriptorSet * descriptorSetOut)951 angle::Result CLProgramVk::allocateDescriptorSet(const vk::DescriptorSetLayout &descriptorSetLayout,
952 VkDescriptorSet *descriptorSetOut)
953 {
954 if (mDescriptorPools[DescriptorSetIndex::ShaderResource].get().valid())
955 {
956 ANGLE_CL_IMPL_TRY_ERROR(
957 mDescriptorPools[DescriptorSetIndex::ShaderResource].get().allocateDescriptorSet(
958 mContext, descriptorSetLayout, &mPoolBinding, descriptorSetOut),
959 CL_INVALID_OPERATION);
960 }
961 return angle::Result::Continue;
962 }
963
964 } // namespace rx
965