1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLDeviceVk.cpp: Implements the class methods for CLDeviceVk.
7
8 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
9 #include "libANGLE/renderer/vulkan/clspv_utils.h"
10 #include "libANGLE/renderer/vulkan/vk_renderer.h"
11
12 #include "libANGLE/renderer/cl_types.h"
13
14 #include "libANGLE/cl_utils.h"
15
16 namespace rx
17 {
18
CLDeviceVk(const cl::Device & device,vk::Renderer * renderer)19 CLDeviceVk::CLDeviceVk(const cl::Device &device, vk::Renderer *renderer)
20 : CLDeviceImpl(device), mRenderer(renderer), mSpirvVersion(ClspvGetSpirvVersion(renderer))
21 {
22 const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties();
23
24 // Setup initial device mInfo fields
25 // TODO(aannestrand) Create cl::Caps and use for device creation
26 // http://anglebug.com/42266954
27 mInfoString = {
28 {cl::DeviceInfo::Name, std::string(props.deviceName)},
29 {cl::DeviceInfo::Vendor, mRenderer->getVendorString()},
30 {cl::DeviceInfo::DriverVersion, mRenderer->getVersionString(true)},
31 {cl::DeviceInfo::Version, std::string("OpenCL 3.0 " + mRenderer->getVersionString(true))},
32 {cl::DeviceInfo::Profile, std::string("FULL_PROFILE")},
33 {cl::DeviceInfo::OpenCL_C_Version, std::string("OpenCL C 1.2 ")},
34 {cl::DeviceInfo::LatestConformanceVersionPassed, std::string("FIXME")}};
35 mInfoSizeT = {
36 {cl::DeviceInfo::MaxWorkGroupSize, props.limits.maxComputeWorkGroupInvocations},
37 {cl::DeviceInfo::MaxGlobalVariableSize, 0},
38 {cl::DeviceInfo::GlobalVariablePreferredTotalSize, 0},
39
40 // TODO(aannestrand) Update these hardcoded platform/device queries
41 // http://anglebug.com/42266935
42 {cl::DeviceInfo::MaxParameterSize, 1024},
43 {cl::DeviceInfo::ProfilingTimerResolution, 1},
44 {cl::DeviceInfo::PrintfBufferSize, 1024 * 1024},
45 {cl::DeviceInfo::PreferredWorkGroupSizeMultiple, 16},
46 };
47 mInfoULong = {
48 {cl::DeviceInfo::LocalMemSize, props.limits.maxComputeSharedMemorySize},
49 {cl::DeviceInfo::SVM_Capabilities, 0},
50 {cl::DeviceInfo::QueueOnDeviceProperties, 0},
51 {cl::DeviceInfo::PartitionAffinityDomain, 0},
52 {cl::DeviceInfo::DeviceEnqueueCapabilities, 0},
53 {cl::DeviceInfo::QueueOnHostProperties, CL_QUEUE_PROFILING_ENABLE},
54
55 // TODO(aannestrand) Update these hardcoded platform/device queries
56 // http://anglebug.com/42266935
57 {cl::DeviceInfo::HalfFpConfig, 0},
58 {cl::DeviceInfo::DoubleFpConfig, 0},
59 {cl::DeviceInfo::GlobalMemCacheSize, 0},
60 {cl::DeviceInfo::GlobalMemSize, 1024 * 1024 * 1024},
61 {cl::DeviceInfo::MaxConstantBufferSize, 64 * 1024},
62 {cl::DeviceInfo::SingleFpConfig, CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN | CL_FP_FMA},
63 {cl::DeviceInfo::AtomicMemoryCapabilities,
64 CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP},
65 // TODO (http://anglebug.com/379669750) Add these based on the Vulkan features query
66 {cl::DeviceInfo::AtomicFenceCapabilities, CL_DEVICE_ATOMIC_ORDER_RELAXED |
67 CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
68 CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP |
69 // non-mandatory
70 CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM},
71 };
72 mInfoUInt = {
73 {cl::DeviceInfo::VendorID, props.vendorID},
74 {cl::DeviceInfo::MaxReadImageArgs, cl::IMPLEMENATION_MAX_READ_IMAGES},
75 {cl::DeviceInfo::MaxWriteImageArgs, cl::IMPLEMENATION_MAX_WRITE_IMAGES},
76 {cl::DeviceInfo::MaxReadWriteImageArgs, cl::IMPLEMENATION_MAX_WRITE_IMAGES},
77 {cl::DeviceInfo::GlobalMemCachelineSize,
78 static_cast<cl_uint>(props.limits.nonCoherentAtomSize)},
79 {cl::DeviceInfo::Available, CL_TRUE},
80 {cl::DeviceInfo::LinkerAvailable, CL_TRUE},
81 {cl::DeviceInfo::CompilerAvailable, CL_TRUE},
82 {cl::DeviceInfo::MaxOnDeviceQueues, 0},
83 {cl::DeviceInfo::MaxOnDeviceEvents, 0},
84 {cl::DeviceInfo::QueueOnDeviceMaxSize, 0},
85 {cl::DeviceInfo::QueueOnDevicePreferredSize, 0},
86 {cl::DeviceInfo::MaxPipeArgs, 0},
87 {cl::DeviceInfo::PipeMaxPacketSize, 0},
88 {cl::DeviceInfo::PipeSupport, CL_FALSE},
89 {cl::DeviceInfo::PipeMaxActiveReservations, 0},
90 {cl::DeviceInfo::ErrorCorrectionSupport, CL_FALSE},
91 {cl::DeviceInfo::PreferredInteropUserSync, CL_TRUE},
92 {cl::DeviceInfo::ExecutionCapabilities, CL_EXEC_KERNEL},
93
94 // TODO(aannestrand) Update these hardcoded platform/device queries
95 // http://anglebug.com/42266935
96 {cl::DeviceInfo::AddressBits, 32},
97 {cl::DeviceInfo::EndianLittle, CL_TRUE},
98 {cl::DeviceInfo::LocalMemType, CL_LOCAL},
99 // TODO (http://anglebug.com/379669750) Vulkan reports a big sampler count number, we dont
100 // need that many and set it to minimum req for now.
101 {cl::DeviceInfo::MaxSamplers, 16u},
102 {cl::DeviceInfo::MaxConstantArgs, 8},
103 {cl::DeviceInfo::MaxNumSubGroups, 0},
104 {cl::DeviceInfo::MaxComputeUnits, 4},
105 {cl::DeviceInfo::MaxClockFrequency, 555},
106 {cl::DeviceInfo::MaxWorkItemDimensions, 3},
107 {cl::DeviceInfo::MinDataTypeAlignSize, 128},
108 {cl::DeviceInfo::GlobalMemCacheType, CL_NONE},
109 {cl::DeviceInfo::HostUnifiedMemory, CL_TRUE},
110 {cl::DeviceInfo::NativeVectorWidthChar, 4},
111 {cl::DeviceInfo::NativeVectorWidthShort, 2},
112 {cl::DeviceInfo::NativeVectorWidthInt, 1},
113 {cl::DeviceInfo::NativeVectorWidthLong, 1},
114 {cl::DeviceInfo::NativeVectorWidthFloat, 1},
115 {cl::DeviceInfo::NativeVectorWidthDouble, 1},
116 {cl::DeviceInfo::NativeVectorWidthHalf, 0},
117 {cl::DeviceInfo::PartitionMaxSubDevices, 0},
118 {cl::DeviceInfo::PreferredVectorWidthInt, 1},
119 {cl::DeviceInfo::PreferredVectorWidthLong, 1},
120 {cl::DeviceInfo::PreferredVectorWidthChar, 4},
121 {cl::DeviceInfo::PreferredVectorWidthHalf, 0},
122 {cl::DeviceInfo::PreferredVectorWidthShort, 2},
123 {cl::DeviceInfo::PreferredVectorWidthFloat, 1},
124 {cl::DeviceInfo::PreferredVectorWidthDouble, 0},
125 {cl::DeviceInfo::PreferredLocalAtomicAlignment, 0},
126 {cl::DeviceInfo::PreferredGlobalAtomicAlignment, 0},
127 {cl::DeviceInfo::PreferredPlatformAtomicAlignment, 0},
128 {cl::DeviceInfo::NonUniformWorkGroupSupport, CL_TRUE},
129 {cl::DeviceInfo::GenericAddressSpaceSupport, CL_FALSE},
130 {cl::DeviceInfo::SubGroupIndependentForwardProgress, CL_FALSE},
131 {cl::DeviceInfo::WorkGroupCollectiveFunctionsSupport, CL_FALSE},
132 };
133 }
134
135 CLDeviceVk::~CLDeviceVk() = default;
136
createInfo(cl::DeviceType type) const137 CLDeviceImpl::Info CLDeviceVk::createInfo(cl::DeviceType type) const
138 {
139 Info info(type);
140
141 const VkPhysicalDeviceProperties &properties = mRenderer->getPhysicalDeviceProperties();
142
143 info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[0]);
144 info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[1]);
145 info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[2]);
146
147 // TODO(aannestrand) Update these hardcoded platform/device queries
148 // http://anglebug.com/42266935
149 info.maxMemAllocSize = 1 << 30;
150 info.memBaseAddrAlign = 1024;
151
152 info.imageSupport = CL_TRUE;
153
154 info.image2D_MaxWidth = properties.limits.maxImageDimension2D;
155 info.image2D_MaxHeight = properties.limits.maxImageDimension2D;
156 info.image3D_MaxWidth = properties.limits.maxImageDimension3D;
157 info.image3D_MaxHeight = properties.limits.maxImageDimension3D;
158 info.image3D_MaxDepth = properties.limits.maxImageDimension3D;
159 // TODO (http://anglebug.com/379669750) For now set it minimum requirement.
160 info.imageMaxBufferSize = 65536;
161 info.imageMaxArraySize = properties.limits.maxImageArrayLayers;
162 info.imagePitchAlignment = 0u;
163 info.imageBaseAddressAlignment = 0u;
164
165 info.execCapabilities = CL_EXEC_KERNEL;
166 info.queueOnDeviceMaxSize = 0u;
167 info.builtInKernels = "";
168 info.version = CL_MAKE_VERSION(3, 0, 0);
169 info.versionStr = "OpenCL 3.0 " + mRenderer->getVersionString(true);
170 info.OpenCL_C_AllVersions = {{CL_MAKE_VERSION(1, 0, 0), "OpenCL C"},
171 {CL_MAKE_VERSION(1, 1, 0), "OpenCL C"},
172 {CL_MAKE_VERSION(1, 2, 0), "OpenCL C"},
173 {CL_MAKE_VERSION(3, 0, 0), "OpenCL C"}};
174
175 info.OpenCL_C_Features = {};
176 info.ILsWithVersion = {};
177 info.builtInKernelsWithVersion = {};
178 info.partitionProperties = {};
179 info.partitionType = {};
180 info.IL_Version = "";
181
182 // Below extensions are required as of OpenCL 1.1, add their versioned strings
183 NameVersionVector versionedExtensionList = {
184 // Below extensions are required as of OpenCL 1.1
185 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
186 .name = "cl_khr_byte_addressable_store"},
187 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
188 .name = "cl_khr_global_int32_base_atomics"},
189 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
190 .name = "cl_khr_global_int32_extended_atomics"},
191 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
192 .name = "cl_khr_local_int32_base_atomics"},
193 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
194 .name = "cl_khr_local_int32_extended_atomics"},
195 };
196 if (info.imageSupport && info.image3D_MaxDepth > 1)
197 {
198 versionedExtensionList.push_back(
199 cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0), .name = "cl_khr_3d_image_writes"});
200 }
201 info.initializeVersionedExtensions(std::move(versionedExtensionList));
202
203 if (!mRenderer->getFeatures().supportsUniformBufferStandardLayout.enabled)
204 {
205 ERR() << "VK_KHR_uniform_buffer_standard_layout extension support is needed to properly "
206 "support uniform buffers. Otherwise, you must disable OpenCL.";
207 }
208
209 // Populate supported features
210 if (info.imageSupport)
211 {
212 info.OpenCL_C_Features.push_back(
213 cl_name_version{.version = CL_MAKE_VERSION(3, 0, 0), .name = "__opencl_c_images"});
214 info.OpenCL_C_Features.push_back(cl_name_version{.version = CL_MAKE_VERSION(3, 0, 0),
215 .name = "__opencl_c_3d_image_writes"});
216 info.OpenCL_C_Features.push_back(cl_name_version{.version = CL_MAKE_VERSION(3, 0, 0),
217 .name = "__opencl_c_read_write_images"});
218 }
219 if (mRenderer->getEnabledFeatures().features.shaderInt64)
220 {
221 info.OpenCL_C_Features.push_back(
222 cl_name_version{.version = CL_MAKE_VERSION(3, 0, 0), .name = "__opencl_c_int64"});
223 }
224
225 return info;
226 }
227
getInfoUInt(cl::DeviceInfo name,cl_uint * value) const228 angle::Result CLDeviceVk::getInfoUInt(cl::DeviceInfo name, cl_uint *value) const
229 {
230 if (mInfoUInt.count(name))
231 {
232 *value = mInfoUInt.at(name);
233 return angle::Result::Continue;
234 }
235 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
236 }
237
getInfoULong(cl::DeviceInfo name,cl_ulong * value) const238 angle::Result CLDeviceVk::getInfoULong(cl::DeviceInfo name, cl_ulong *value) const
239 {
240 if (mInfoULong.count(name))
241 {
242 *value = mInfoULong.at(name);
243 return angle::Result::Continue;
244 }
245 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
246 }
247
getInfoSizeT(cl::DeviceInfo name,size_t * value) const248 angle::Result CLDeviceVk::getInfoSizeT(cl::DeviceInfo name, size_t *value) const
249 {
250 if (mInfoSizeT.count(name))
251 {
252 *value = mInfoSizeT.at(name);
253 return angle::Result::Continue;
254 }
255 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
256 }
257
getInfoStringLength(cl::DeviceInfo name,size_t * value) const258 angle::Result CLDeviceVk::getInfoStringLength(cl::DeviceInfo name, size_t *value) const
259 {
260 if (mInfoString.count(name))
261 {
262 *value = mInfoString.at(name).length() + 1;
263 return angle::Result::Continue;
264 }
265 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
266 }
267
getInfoString(cl::DeviceInfo name,size_t size,char * value) const268 angle::Result CLDeviceVk::getInfoString(cl::DeviceInfo name, size_t size, char *value) const
269 {
270 if (mInfoString.count(name))
271 {
272 std::strcpy(value, mInfoString.at(name).c_str());
273 return angle::Result::Continue;
274 }
275 ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
276 }
277
createSubDevices(const cl_device_partition_property * properties,cl_uint numDevices,CreateFuncs & subDevices,cl_uint * numDevicesRet)278 angle::Result CLDeviceVk::createSubDevices(const cl_device_partition_property *properties,
279 cl_uint numDevices,
280 CreateFuncs &subDevices,
281 cl_uint *numDevicesRet)
282 {
283 UNIMPLEMENTED();
284 ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
285 }
286
selectWorkGroupSize(const cl::NDRange & ndrange) const287 cl::WorkgroupSize CLDeviceVk::selectWorkGroupSize(const cl::NDRange &ndrange) const
288 {
289 // Limit total work-group size to the Vulkan device's limit
290 const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties();
291 uint32_t maxSize = static_cast<uint32_t>(mInfoSizeT.at(cl::DeviceInfo::MaxWorkGroupSize));
292 maxSize = std::min(maxSize, 64u);
293
294 bool keepIncreasing = false;
295 cl::WorkgroupSize localSize = {1, 1, 1};
296 do
297 {
298 keepIncreasing = false;
299 for (cl_uint i = 0; i < ndrange.workDimensions; i++)
300 {
301 cl::WorkgroupSize newLocalSize = localSize;
302 newLocalSize[i] *= 2;
303
304 if (newLocalSize[i] <= props.limits.maxComputeWorkGroupCount[i] &&
305 newLocalSize[0] * newLocalSize[1] * newLocalSize[2] <= maxSize)
306 {
307 localSize = newLocalSize;
308 keepIncreasing = true;
309 }
310 }
311 } while (keepIncreasing);
312
313 return localSize;
314 }
315
316 } // namespace rx
317