• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // cl_types.h: Defines common types for the OpenCL support in ANGLE.
7 
8 #ifndef LIBANGLE_CLTYPES_H_
9 #define LIBANGLE_CLTYPES_H_
10 
11 #if defined(ANGLE_ENABLE_CL)
12 #    include "libANGLE/CLBitField.h"
13 #    include "libANGLE/CLRefPointer.h"
14 #    include "libANGLE/Debug.h"
15 #    include "libANGLE/angletypes.h"
16 
17 #    include "common/PackedCLEnums_autogen.h"
18 #    include "common/angleutils.h"
19 
20 // Include frequently used standard headers
21 #    include <algorithm>
22 #    include <array>
23 #    include <functional>
24 #    include <list>
25 #    include <memory>
26 #    include <string>
27 #    include <utility>
28 #    include <vector>
29 
30 namespace cl
31 {
32 
33 class Buffer;
34 class CommandQueue;
35 class Context;
36 class Device;
37 class Event;
38 class Image;
39 class Kernel;
40 class Memory;
41 class Object;
42 class Platform;
43 class Program;
44 class Sampler;
45 
46 using BufferPtr       = RefPointer<Buffer>;
47 using CommandQueuePtr = RefPointer<CommandQueue>;
48 using ContextPtr      = RefPointer<Context>;
49 using DevicePtr       = RefPointer<Device>;
50 using EventPtr        = RefPointer<Event>;
51 using KernelPtr       = RefPointer<Kernel>;
52 using MemoryPtr       = RefPointer<Memory>;
53 using PlatformPtr     = RefPointer<Platform>;
54 using ProgramPtr      = RefPointer<Program>;
55 using SamplerPtr      = RefPointer<Sampler>;
56 
57 using BufferPtrs   = std::vector<BufferPtr>;
58 using DevicePtrs   = std::vector<DevicePtr>;
59 using EventPtrs    = std::vector<EventPtr>;
60 using KernelPtrs   = std::vector<KernelPtr>;
61 using MemoryPtrs   = std::vector<MemoryPtr>;
62 using PlatformPtrs = std::vector<PlatformPtr>;
63 using ProgramPtrs  = std::vector<ProgramPtr>;
64 using SamplerPtrs  = std::vector<SamplerPtr>;
65 
66 using WorkgroupSize    = std::array<uint32_t, 3>;
67 using GlobalWorkOffset = std::array<uint32_t, 3>;
68 using GlobalWorkSize   = std::array<uint32_t, 3>;
69 using WorkgroupCount   = std::array<uint32_t, 3>;
70 
71 template <typename T>
72 using EventStatusMap = std::array<T, 3>;
73 
74 using Extents = angle::Extents<size_t>;
75 using Offset  = angle::Offset<size_t>;
76 constexpr Offset kOffsetZero(0, 0, 0);
77 
78 struct KernelArg
79 {
80     bool isSet;
81     cl_uint index;
82     size_t size;
83     const void *valuePtr;
84 };
85 
86 struct BufferRect
87 {
88     BufferRect(const Offset &offset,
89                const Extents &size,
90                const size_t row_pitch,
91                const size_t slice_pitch,
92                const size_t element_size = 1)
mOriginBufferRect93         : mOrigin(offset),
94           mSize(size),
95           mRowPitch(row_pitch == 0 ? element_size * size.width : row_pitch),
96           mSlicePitch(slice_pitch == 0 ? mRowPitch * size.height : slice_pitch),
97           mElementSize(element_size)
98     {}
validBufferRect99     bool valid() const
100     {
101         return mSize.width != 0 && mSize.height != 0 && mSize.depth != 0 &&
102                mRowPitch >= mSize.width * mElementSize && mSlicePitch >= mRowPitch * mSize.height &&
103                mElementSize > 0;
104     }
105     bool operator==(const BufferRect &other) const
106     {
107         return (mOrigin == other.mOrigin && mSize == other.mSize && mRowPitch == other.mRowPitch &&
108                 mSlicePitch == other.mSlicePitch && mElementSize == other.mElementSize);
109     }
110     bool operator!=(const BufferRect &other) const { return !(*this == other); }
111 
getRowOffsetBufferRect112     size_t getRowOffset(size_t slice, size_t row) const
113     {
114         return ((mRowPitch * (mOrigin.y + row)) + (mOrigin.x * mElementSize)) +  // row offset
115                (mSlicePitch * (mOrigin.z + slice));                              // slice offset
116     }
117 
getRowPitchBufferRect118     size_t getRowPitch() { return mRowPitch; }
getSlicePitchBufferRect119     size_t getSlicePitch() { return mSlicePitch; }
120     Offset mOrigin;
121     Extents mSize;
122     size_t mRowPitch;
123     size_t mSlicePitch;
124     size_t mElementSize;
125 };
126 
127 struct ImageDescriptor
128 {
129     MemObjectType type;
130     size_t width;
131     size_t height;
132     size_t depth;
133     size_t arraySize;
134     size_t rowPitch;
135     size_t slicePitch;
136     cl_uint numMipLevels;
137     cl_uint numSamples;
138 
ImageDescriptorImageDescriptor139     ImageDescriptor(MemObjectType type_,
140                     size_t width_,
141                     size_t height_,
142                     size_t depth_,
143                     size_t arraySize_,
144                     size_t rowPitch_,
145                     size_t slicePitch_,
146                     cl_uint numMipLevels_,
147                     cl_uint numSamples_)
148         : type(type_),
149           width(width_),
150           height(height_),
151           depth(depth_),
152           arraySize(arraySize_),
153           rowPitch(rowPitch_),
154           slicePitch(slicePitch_),
155           numMipLevels(numMipLevels_),
156           numSamples(numSamples_)
157     {
158         if (type == MemObjectType::Image1D || type == MemObjectType::Image1D_Array ||
159             type == MemObjectType::Image1D_Buffer)
160         {
161             depth  = 1;
162             height = 1;
163         }
164         if (type == MemObjectType::Image2D || type == MemObjectType::Image2D_Array)
165         {
166             depth = 1;
167         }
168         if (!(type == MemObjectType::Image1D_Array || type == MemObjectType::Image2D_Array))
169         {
170             arraySize = 1;
171         }
172     }
173 };
174 
175 struct MemOffsets
176 {
177     size_t x, y, z;
178 };
179 constexpr MemOffsets kMemOffsetsZero{0, 0, 0};
180 
181 struct Coordinate
182 {
183     size_t x, y, z;
184 };
185 constexpr Coordinate kCoordinateZero{0, 0, 0};
186 
187 struct NDRange
188 {
NDRangeNDRange189     NDRange(cl_uint workDimensionsIn,
190             const size_t *globalWorkOffsetIn,
191             const size_t *globalWorkSizeIn,
192             const size_t *localWorkSizeIn)
193         : workDimensions(workDimensionsIn),
194           globalWorkOffset({0, 0, 0}),
195           globalWorkSize({1, 1, 1}),
196           localWorkSize({1, 1, 1}),
197           nullLocalWorkSize(localWorkSizeIn == nullptr)
198     {
199         for (cl_uint dim = 0; dim < workDimensionsIn; dim++)
200         {
201             if (globalWorkOffsetIn != nullptr)
202             {
203                 ASSERT(!(static_cast<uint32_t>((globalWorkOffsetIn[dim] + globalWorkSizeIn[dim])) <
204                          globalWorkOffsetIn[dim]));
205                 globalWorkOffset[dim] = static_cast<uint32_t>(globalWorkOffsetIn[dim]);
206             }
207             if (globalWorkSizeIn != nullptr)
208             {
209                 ASSERT(globalWorkSizeIn[dim] <= UINT32_MAX);
210                 globalWorkSize[dim] = static_cast<uint32_t>(globalWorkSizeIn[dim]);
211             }
212             if (localWorkSizeIn != nullptr)
213             {
214                 ASSERT(localWorkSizeIn[dim] <= UINT32_MAX);
215                 localWorkSize[dim] = static_cast<uint32_t>(localWorkSizeIn[dim]);
216             }
217         }
218     }
219 
getWorkgroupCountNDRange220     cl::WorkgroupCount getWorkgroupCount() const
221     {
222         ASSERT(localWorkSize[0] > 0 && localWorkSize[1] > 0 && localWorkSize[2] > 0);
223         return cl::WorkgroupCount{rx::UnsignedCeilDivide(globalWorkSize[0], localWorkSize[0]),
224                                   rx::UnsignedCeilDivide(globalWorkSize[1], localWorkSize[1]),
225                                   rx::UnsignedCeilDivide(globalWorkSize[2], localWorkSize[2])};
226     }
227 
isUniformNDRange228     bool isUniform() const
229     {
230         for (cl_uint dim = 0; dim < workDimensions; dim++)
231         {
232             if (globalWorkSize[dim] % localWorkSize[dim] != 0)
233             {
234                 return false;
235             }
236         }
237         return true;
238     }
239 
createUniformRegionsNDRange240     std::vector<NDRange> createUniformRegions(
241         const std::array<uint32_t, 3> maxComputeWorkGroupCount) const
242     {
243         std::vector<NDRange> regions;
244         regions.push_back(*this);
245         regions.front().globalWorkOffset = {0};
246         uint32_t regionCount             = 1;
247         for (uint32_t regionPos = 0; regionPos < regionCount; ++regionPos)
248         {
249             // "Work-group sizes could be non-uniform in multiple dimensions, potentially producing
250             // work-groups of up to 4 different sizes in a 2D range and 8 different sizes in a 3D
251             // range."
252             // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_mapping_work_items_onto_an_nd_range
253             ASSERT(regionPos < 8);
254 
255             for (uint32_t dim = 0; dim < workDimensions; dim++)
256             {
257                 NDRange &region    = regions.at(regionPos);
258                 uint32_t remainder = region.globalWorkSize[dim] % region.localWorkSize[dim];
259                 if (remainder != 0)
260                 {
261                     // Split the range along this dimension. The original range's global work size
262                     // (e.g. 19) is clipped to a multiple of the local work size (e.g. 8). A new
263                     // range is added for the remainder (in this example 3) where the global and
264                     // local work sizes are identical to the remainder (i.e. it's also a uniform
265                     // range).
266                     NDRange newRegion(region);
267                     newRegion.globalWorkSize[dim] = newRegion.localWorkSize[dim] = remainder;
268                     region.globalWorkSize[dim] = newRegion.globalWorkOffset[dim] =
269                         (region.globalWorkSize[dim] - remainder);
270                     regions.push_back(newRegion);
271                     regionCount++;
272                 }
273             }
274         }
275         // Break into uniform regions that fit into given maxComputeWorkGroupCount (if needed)
276         uint32_t limitRegionCount = 1;
277         std::vector<NDRange> regionsWithinDeviceLimits;
278         for (const auto &region : regions)
279         {
280             regionsWithinDeviceLimits.push_back(region);
281             for (uint32_t regionPos = 0; regionPos < limitRegionCount; ++regionPos)
282             {
283                 NDRange &currentRegion = regionsWithinDeviceLimits.at(regionPos);
284                 for (uint32_t dim = 0; dim < workDimensions; dim++)
285                 {
286                     uint32_t maxGwsForRegion = gl::clampCast<uint32_t, uint64_t>(
287                         static_cast<uint64_t>(maxComputeWorkGroupCount[dim]) *
288                         static_cast<uint64_t>(currentRegion.localWorkSize[dim]));
289 
290                     if (currentRegion.globalWorkSize[dim] > maxGwsForRegion)
291                     {
292                         uint32_t remainderGws = currentRegion.globalWorkSize[dim] - maxGwsForRegion;
293                         if (remainderGws > 0)
294                         {
295                             NDRange remainderRegion             = currentRegion;
296                             remainderRegion.globalWorkSize[dim] = remainderGws;
297                             remainderRegion.globalWorkOffset[dim] =
298                                 currentRegion.globalWorkOffset[dim] +
299                                 (currentRegion.globalWorkSize[dim] - remainderGws);
300                             currentRegion.globalWorkSize[dim] = maxGwsForRegion;
301                             regionsWithinDeviceLimits.push_back(remainderRegion);
302                             limitRegionCount++;
303                         }
304                     }
305                 }
306             }
307         }
308         return regionsWithinDeviceLimits;
309     }
310 
311     cl_uint workDimensions;
312     GlobalWorkOffset globalWorkOffset;
313     GlobalWorkSize globalWorkSize;
314     WorkgroupSize localWorkSize;
315     bool nullLocalWorkSize{false};
316 };
317 
318 }  // namespace cl
319 
320 #endif  // ANGLE_ENABLE_CL
321 
322 #endif  // LIBANGLE_CLTYPES_H_
323