• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLDeviceVk.cpp: Implements the class methods for CLDeviceVk.
7 
8 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
9 #include "libANGLE/renderer/vulkan/clspv_utils.h"
10 #include "libANGLE/renderer/vulkan/vk_renderer.h"
11 
12 #include "libANGLE/renderer/cl_types.h"
13 
14 #include "libANGLE/cl_utils.h"
15 
16 namespace rx
17 {
18 
CLDeviceVk(const cl::Device & device,vk::Renderer * renderer)19 CLDeviceVk::CLDeviceVk(const cl::Device &device, vk::Renderer *renderer)
20     : CLDeviceImpl(device), mRenderer(renderer), mSpirvVersion(ClspvGetSpirvVersion(renderer))
21 {
22     const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties();
23 
24     // Setup initial device mInfo fields
25     // TODO(aannestrand) Create cl::Caps and use for device creation
26     // http://anglebug.com/42266954
27     mInfoString = {
28         {cl::DeviceInfo::Name, std::string(props.deviceName)},
29         {cl::DeviceInfo::Vendor, mRenderer->getVendorString()},
30         {cl::DeviceInfo::DriverVersion, mRenderer->getVersionString(true)},
31         {cl::DeviceInfo::Version, std::string("OpenCL 3.0 " + mRenderer->getVersionString(true))},
32         {cl::DeviceInfo::Profile, std::string("FULL_PROFILE")},
33         {cl::DeviceInfo::OpenCL_C_Version, std::string("OpenCL C 1.2 ")},
34         {cl::DeviceInfo::LatestConformanceVersionPassed, std::string("FIXME")}};
35     mInfoSizeT = {
36         {cl::DeviceInfo::MaxWorkGroupSize, props.limits.maxComputeWorkGroupInvocations},
37         {cl::DeviceInfo::MaxGlobalVariableSize, 0},
38         {cl::DeviceInfo::GlobalVariablePreferredTotalSize, 0},
39 
40         // TODO(aannestrand) Update these hardcoded platform/device queries
41         // http://anglebug.com/42266935
42         {cl::DeviceInfo::MaxParameterSize, 1024},
43         {cl::DeviceInfo::ProfilingTimerResolution, 1},
44         {cl::DeviceInfo::PrintfBufferSize, 1024 * 1024},
45         {cl::DeviceInfo::PreferredWorkGroupSizeMultiple, 16},
46     };
47     mInfoULong = {
48         {cl::DeviceInfo::LocalMemSize, props.limits.maxComputeSharedMemorySize},
49         {cl::DeviceInfo::SVM_Capabilities, 0},
50         {cl::DeviceInfo::QueueOnDeviceProperties, 0},
51         {cl::DeviceInfo::PartitionAffinityDomain, 0},
52         {cl::DeviceInfo::DeviceEnqueueCapabilities, 0},
53         {cl::DeviceInfo::QueueOnHostProperties, CL_QUEUE_PROFILING_ENABLE},
54 
55         // TODO(aannestrand) Update these hardcoded platform/device queries
56         // http://anglebug.com/42266935
57         {cl::DeviceInfo::HalfFpConfig, 0},
58         {cl::DeviceInfo::DoubleFpConfig, 0},
59         {cl::DeviceInfo::GlobalMemCacheSize, 0},
60         {cl::DeviceInfo::GlobalMemSize, 1024 * 1024 * 1024},
61         {cl::DeviceInfo::MaxConstantBufferSize, 64 * 1024},
62         {cl::DeviceInfo::SingleFpConfig, CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN | CL_FP_FMA},
63         {cl::DeviceInfo::AtomicMemoryCapabilities,
64          CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP},
65         // TODO (http://anglebug.com/379669750) Add these based on the Vulkan features query
66         {cl::DeviceInfo::AtomicFenceCapabilities, CL_DEVICE_ATOMIC_ORDER_RELAXED |
67                                                       CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
68                                                       CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP |
69                                                       // non-mandatory
70                                                       CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM},
71     };
72     mInfoUInt = {
73         {cl::DeviceInfo::VendorID, props.vendorID},
74         {cl::DeviceInfo::MaxReadImageArgs, cl::IMPLEMENATION_MAX_READ_IMAGES},
75         {cl::DeviceInfo::MaxWriteImageArgs, cl::IMPLEMENATION_MAX_WRITE_IMAGES},
76         {cl::DeviceInfo::MaxReadWriteImageArgs, cl::IMPLEMENATION_MAX_WRITE_IMAGES},
77         {cl::DeviceInfo::GlobalMemCachelineSize,
78          static_cast<cl_uint>(props.limits.nonCoherentAtomSize)},
79         {cl::DeviceInfo::Available, CL_TRUE},
80         {cl::DeviceInfo::LinkerAvailable, CL_TRUE},
81         {cl::DeviceInfo::CompilerAvailable, CL_TRUE},
82         {cl::DeviceInfo::MaxOnDeviceQueues, 0},
83         {cl::DeviceInfo::MaxOnDeviceEvents, 0},
84         {cl::DeviceInfo::QueueOnDeviceMaxSize, 0},
85         {cl::DeviceInfo::QueueOnDevicePreferredSize, 0},
86         {cl::DeviceInfo::MaxPipeArgs, 0},
87         {cl::DeviceInfo::PipeMaxPacketSize, 0},
88         {cl::DeviceInfo::PipeSupport, CL_FALSE},
89         {cl::DeviceInfo::PipeMaxActiveReservations, 0},
90         {cl::DeviceInfo::ErrorCorrectionSupport, CL_FALSE},
91         {cl::DeviceInfo::PreferredInteropUserSync, CL_TRUE},
92         {cl::DeviceInfo::ExecutionCapabilities, CL_EXEC_KERNEL},
93 
94         // TODO(aannestrand) Update these hardcoded platform/device queries
95         // http://anglebug.com/42266935
96         {cl::DeviceInfo::AddressBits, 32},
97         {cl::DeviceInfo::EndianLittle, CL_TRUE},
98         {cl::DeviceInfo::LocalMemType, CL_LOCAL},
99         // TODO (http://anglebug.com/379669750) Vulkan reports a big sampler count number, we dont
100         // need that many and set it to minimum req for now.
101         {cl::DeviceInfo::MaxSamplers, 16u},
102         {cl::DeviceInfo::MaxConstantArgs, 8},
103         {cl::DeviceInfo::MaxNumSubGroups, 0},
104         {cl::DeviceInfo::MaxComputeUnits, 4},
105         {cl::DeviceInfo::MaxClockFrequency, 555},
106         {cl::DeviceInfo::MaxWorkItemDimensions, 3},
107         {cl::DeviceInfo::MinDataTypeAlignSize, 128},
108         {cl::DeviceInfo::GlobalMemCacheType, CL_NONE},
109         {cl::DeviceInfo::HostUnifiedMemory, CL_TRUE},
110         {cl::DeviceInfo::NativeVectorWidthChar, 4},
111         {cl::DeviceInfo::NativeVectorWidthShort, 2},
112         {cl::DeviceInfo::NativeVectorWidthInt, 1},
113         {cl::DeviceInfo::NativeVectorWidthLong, 1},
114         {cl::DeviceInfo::NativeVectorWidthFloat, 1},
115         {cl::DeviceInfo::NativeVectorWidthDouble, 1},
116         {cl::DeviceInfo::NativeVectorWidthHalf, 0},
117         {cl::DeviceInfo::PartitionMaxSubDevices, 0},
118         {cl::DeviceInfo::PreferredVectorWidthInt, 1},
119         {cl::DeviceInfo::PreferredVectorWidthLong, 1},
120         {cl::DeviceInfo::PreferredVectorWidthChar, 4},
121         {cl::DeviceInfo::PreferredVectorWidthHalf, 0},
122         {cl::DeviceInfo::PreferredVectorWidthShort, 2},
123         {cl::DeviceInfo::PreferredVectorWidthFloat, 1},
124         {cl::DeviceInfo::PreferredVectorWidthDouble, 0},
125         {cl::DeviceInfo::PreferredLocalAtomicAlignment, 0},
126         {cl::DeviceInfo::PreferredGlobalAtomicAlignment, 0},
127         {cl::DeviceInfo::PreferredPlatformAtomicAlignment, 0},
128         {cl::DeviceInfo::NonUniformWorkGroupSupport, CL_TRUE},
129         {cl::DeviceInfo::GenericAddressSpaceSupport, CL_FALSE},
130         {cl::DeviceInfo::SubGroupIndependentForwardProgress, CL_FALSE},
131         {cl::DeviceInfo::WorkGroupCollectiveFunctionsSupport, CL_FALSE},
132     };
133 }
134 
135 CLDeviceVk::~CLDeviceVk() = default;
136 
createInfo(cl::DeviceType type) const137 CLDeviceImpl::Info CLDeviceVk::createInfo(cl::DeviceType type) const
138 {
139     Info info(type);
140 
141     const VkPhysicalDeviceProperties &properties = mRenderer->getPhysicalDeviceProperties();
142 
143     info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[0]);
144     info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[1]);
145     info.maxWorkItemSizes.push_back(properties.limits.maxComputeWorkGroupSize[2]);
146 
147     // TODO(aannestrand) Update these hardcoded platform/device queries
148     // http://anglebug.com/42266935
149     info.maxMemAllocSize  = 1 << 30;
150     info.memBaseAddrAlign = 1024;
151 
152     info.imageSupport = CL_TRUE;
153 
154     info.image2D_MaxWidth  = properties.limits.maxImageDimension2D;
155     info.image2D_MaxHeight = properties.limits.maxImageDimension2D;
156     info.image3D_MaxWidth  = properties.limits.maxImageDimension3D;
157     info.image3D_MaxHeight = properties.limits.maxImageDimension3D;
158     info.image3D_MaxDepth  = properties.limits.maxImageDimension3D;
159     // TODO (http://anglebug.com/379669750) For now set it minimum requirement.
160     info.imageMaxBufferSize        = 65536;
161     info.imageMaxArraySize         = properties.limits.maxImageArrayLayers;
162     info.imagePitchAlignment       = 0u;
163     info.imageBaseAddressAlignment = 0u;
164 
165     info.execCapabilities     = CL_EXEC_KERNEL;
166     info.queueOnDeviceMaxSize = 0u;
167     info.builtInKernels       = "";
168     info.version              = CL_MAKE_VERSION(3, 0, 0);
169     info.versionStr           = "OpenCL 3.0 " + mRenderer->getVersionString(true);
170     info.OpenCL_C_AllVersions = {{CL_MAKE_VERSION(1, 0, 0), "OpenCL C"},
171                                  {CL_MAKE_VERSION(1, 1, 0), "OpenCL C"},
172                                  {CL_MAKE_VERSION(1, 2, 0), "OpenCL C"},
173                                  {CL_MAKE_VERSION(3, 0, 0), "OpenCL C"}};
174 
175     info.OpenCL_C_Features         = {};
176     info.ILsWithVersion            = {};
177     info.builtInKernelsWithVersion = {};
178     info.partitionProperties       = {};
179     info.partitionType             = {};
180     info.IL_Version                = "";
181 
182     // Below extensions are required as of OpenCL 1.1, add their versioned strings
183     NameVersionVector versionedExtensionList = {
184         // Below extensions are required as of OpenCL 1.1
185         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
186                         .name    = "cl_khr_byte_addressable_store"},
187         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
188                         .name    = "cl_khr_global_int32_base_atomics"},
189         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
190                         .name    = "cl_khr_global_int32_extended_atomics"},
191         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
192                         .name    = "cl_khr_local_int32_base_atomics"},
193         cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0),
194                         .name    = "cl_khr_local_int32_extended_atomics"},
195     };
196     if (info.imageSupport && info.image3D_MaxDepth > 1)
197     {
198         versionedExtensionList.push_back(
199             cl_name_version{.version = CL_MAKE_VERSION(1, 0, 0), .name = "cl_khr_3d_image_writes"});
200     }
201     info.initializeVersionedExtensions(std::move(versionedExtensionList));
202 
203     if (!mRenderer->getFeatures().supportsUniformBufferStandardLayout.enabled)
204     {
205         ERR() << "VK_KHR_uniform_buffer_standard_layout extension support is needed to properly "
206                  "support uniform buffers. Otherwise, you must disable OpenCL.";
207     }
208 
209     // Populate supported features
210     if (info.imageSupport)
211     {
212         info.OpenCL_C_Features.push_back(
213             cl_name_version{.version = CL_MAKE_VERSION(3, 0, 0), .name = "__opencl_c_images"});
214         info.OpenCL_C_Features.push_back(cl_name_version{.version = CL_MAKE_VERSION(3, 0, 0),
215                                                          .name    = "__opencl_c_3d_image_writes"});
216         info.OpenCL_C_Features.push_back(cl_name_version{.version = CL_MAKE_VERSION(3, 0, 0),
217                                                          .name = "__opencl_c_read_write_images"});
218     }
219     if (mRenderer->getEnabledFeatures().features.shaderInt64)
220     {
221         info.OpenCL_C_Features.push_back(
222             cl_name_version{.version = CL_MAKE_VERSION(3, 0, 0), .name = "__opencl_c_int64"});
223     }
224 
225     return info;
226 }
227 
getInfoUInt(cl::DeviceInfo name,cl_uint * value) const228 angle::Result CLDeviceVk::getInfoUInt(cl::DeviceInfo name, cl_uint *value) const
229 {
230     if (mInfoUInt.count(name))
231     {
232         *value = mInfoUInt.at(name);
233         return angle::Result::Continue;
234     }
235     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
236 }
237 
getInfoULong(cl::DeviceInfo name,cl_ulong * value) const238 angle::Result CLDeviceVk::getInfoULong(cl::DeviceInfo name, cl_ulong *value) const
239 {
240     if (mInfoULong.count(name))
241     {
242         *value = mInfoULong.at(name);
243         return angle::Result::Continue;
244     }
245     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
246 }
247 
getInfoSizeT(cl::DeviceInfo name,size_t * value) const248 angle::Result CLDeviceVk::getInfoSizeT(cl::DeviceInfo name, size_t *value) const
249 {
250     if (mInfoSizeT.count(name))
251     {
252         *value = mInfoSizeT.at(name);
253         return angle::Result::Continue;
254     }
255     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
256 }
257 
getInfoStringLength(cl::DeviceInfo name,size_t * value) const258 angle::Result CLDeviceVk::getInfoStringLength(cl::DeviceInfo name, size_t *value) const
259 {
260     if (mInfoString.count(name))
261     {
262         *value = mInfoString.at(name).length() + 1;
263         return angle::Result::Continue;
264     }
265     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
266 }
267 
getInfoString(cl::DeviceInfo name,size_t size,char * value) const268 angle::Result CLDeviceVk::getInfoString(cl::DeviceInfo name, size_t size, char *value) const
269 {
270     if (mInfoString.count(name))
271     {
272         std::strcpy(value, mInfoString.at(name).c_str());
273         return angle::Result::Continue;
274     }
275     ANGLE_CL_RETURN_ERROR(CL_INVALID_VALUE);
276 }
277 
createSubDevices(const cl_device_partition_property * properties,cl_uint numDevices,CreateFuncs & subDevices,cl_uint * numDevicesRet)278 angle::Result CLDeviceVk::createSubDevices(const cl_device_partition_property *properties,
279                                            cl_uint numDevices,
280                                            CreateFuncs &subDevices,
281                                            cl_uint *numDevicesRet)
282 {
283     UNIMPLEMENTED();
284     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
285 }
286 
selectWorkGroupSize(const cl::NDRange & ndrange) const287 cl::WorkgroupSize CLDeviceVk::selectWorkGroupSize(const cl::NDRange &ndrange) const
288 {
289     // Limit total work-group size to the Vulkan device's limit
290     const VkPhysicalDeviceProperties &props = mRenderer->getPhysicalDeviceProperties();
291     uint32_t maxSize = static_cast<uint32_t>(mInfoSizeT.at(cl::DeviceInfo::MaxWorkGroupSize));
292     maxSize          = std::min(maxSize, 64u);
293 
294     bool keepIncreasing         = false;
295     cl::WorkgroupSize localSize = {1, 1, 1};
296     do
297     {
298         keepIncreasing = false;
299         for (cl_uint i = 0; i < ndrange.workDimensions; i++)
300         {
301             cl::WorkgroupSize newLocalSize = localSize;
302             newLocalSize[i] *= 2;
303 
304             if (newLocalSize[i] <= props.limits.maxComputeWorkGroupCount[i] &&
305                 newLocalSize[0] * newLocalSize[1] * newLocalSize[2] <= maxSize)
306             {
307                 localSize      = newLocalSize;
308                 keepIncreasing = true;
309             }
310         }
311     } while (keepIncreasing);
312 
313     return localSize;
314 }
315 
316 }  // namespace rx
317