1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include <algorithm>
24 #include <unistd.h>
25 #include "core/device.hpp"
26 #include "core/platform.hpp"
27 #include "pipe/p_screen.h"
28 #include "pipe/p_state.h"
29 #include "util/bitscan.h"
30 #include "util/u_debug.h"
31 #include "spirv/invocation.hpp"
32 #include "nir/invocation.hpp"
33 #include <fstream>
34
35 using namespace clover;
36
37 namespace {
38 template<typename T>
39 std::vector<T>
get_compute_param(pipe_screen * pipe,pipe_shader_ir ir_format,pipe_compute_cap cap)40 get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
41 pipe_compute_cap cap) {
42 int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
43 std::vector<T> v(sz / sizeof(T));
44
45 pipe->get_compute_param(pipe, ir_format, cap, &v.front());
46 return v;
47 }
48 }
49
device(clover::platform & platform,pipe_loader_device * ldev)50 device::device(clover::platform &platform, pipe_loader_device *ldev) :
51 platform(platform), clc_cache(NULL), ldev(ldev) {
52 pipe = pipe_loader_create_screen(ldev);
53 if (pipe && pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {
54 if (supports_ir(PIPE_SHADER_IR_NATIVE))
55 return;
56 #ifdef HAVE_CLOVER_SPIRV
57 if (supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED)) {
58 nir::check_for_libclc(*this);
59 clc_cache = nir::create_clc_disk_cache();
60 clc_nir = lazy<std::shared_ptr<nir_shader>>([&] () { std::string log; return std::shared_ptr<nir_shader>(nir::load_libclc_nir(*this, log), ralloc_free); });
61 return;
62 }
63 #endif
64 }
65 if (pipe)
66 pipe->destroy(pipe);
67 throw error(CL_INVALID_DEVICE);
68 }
69
~device()70 device::~device() {
71 if (clc_cache)
72 disk_cache_destroy(clc_cache);
73 if (pipe)
74 pipe->destroy(pipe);
75 if (ldev)
76 pipe_loader_release(&ldev, 1);
77 }
78
79 bool
operator ==(const device & dev) const80 device::operator==(const device &dev) const {
81 return this == &dev;
82 }
83
84 cl_device_type
type() const85 device::type() const {
86 switch (ldev->type) {
87 case PIPE_LOADER_DEVICE_SOFTWARE:
88 return CL_DEVICE_TYPE_CPU;
89 case PIPE_LOADER_DEVICE_PCI:
90 case PIPE_LOADER_DEVICE_PLATFORM:
91 return CL_DEVICE_TYPE_GPU;
92 default:
93 unreachable("Unknown device type.");
94 }
95 }
96
97 cl_uint
vendor_id() const98 device::vendor_id() const {
99 switch (ldev->type) {
100 case PIPE_LOADER_DEVICE_SOFTWARE:
101 case PIPE_LOADER_DEVICE_PLATFORM:
102 return 0;
103 case PIPE_LOADER_DEVICE_PCI:
104 return ldev->u.pci.vendor_id;
105 default:
106 unreachable("Unknown device type.");
107 }
108 }
109
110 size_t
max_images_read() const111 device::max_images_read() const {
112 return PIPE_MAX_SHADER_SAMPLER_VIEWS;
113 }
114
115 size_t
max_images_write() const116 device::max_images_write() const {
117 return PIPE_MAX_SHADER_IMAGES;
118 }
119
120 size_t
max_image_buffer_size() const121 device::max_image_buffer_size() const {
122 return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE);
123 }
124
125 cl_uint
max_image_levels_2d() const126 device::max_image_levels_2d() const {
127 return util_last_bit(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_SIZE));
128 }
129
130 cl_uint
max_image_levels_3d() const131 device::max_image_levels_3d() const {
132 return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS);
133 }
134
135 size_t
max_image_array_number() const136 device::max_image_array_number() const {
137 return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS);
138 }
139
140 cl_uint
max_samplers() const141 device::max_samplers() const {
142 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
143 PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
144 }
145
146 cl_ulong
max_mem_global() const147 device::max_mem_global() const {
148 return get_compute_param<uint64_t>(pipe, ir_format(),
149 PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
150 }
151
152 cl_ulong
max_mem_local() const153 device::max_mem_local() const {
154 return get_compute_param<uint64_t>(pipe, ir_format(),
155 PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
156 }
157
158 cl_ulong
max_mem_input() const159 device::max_mem_input() const {
160 return get_compute_param<uint64_t>(pipe, ir_format(),
161 PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
162 }
163
164 cl_ulong
max_const_buffer_size() const165 device::max_const_buffer_size() const {
166 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
167 PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE);
168 }
169
170 cl_uint
max_const_buffers() const171 device::max_const_buffers() const {
172 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
173 PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
174 }
175
176 size_t
max_threads_per_block() const177 device::max_threads_per_block() const {
178 return get_compute_param<uint64_t>(
179 pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
180 }
181
182 cl_ulong
max_mem_alloc_size() const183 device::max_mem_alloc_size() const {
184 return get_compute_param<uint64_t>(pipe, ir_format(),
185 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
186 }
187
188 cl_uint
max_clock_frequency() const189 device::max_clock_frequency() const {
190 return get_compute_param<uint32_t>(pipe, ir_format(),
191 PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
192 }
193
194 cl_uint
max_compute_units() const195 device::max_compute_units() const {
196 return get_compute_param<uint32_t>(pipe, ir_format(),
197 PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
198 }
199
200 bool
image_support() const201 device::image_support() const {
202 return get_compute_param<uint32_t>(pipe, ir_format(),
203 PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
204 }
205
206 bool
has_doubles() const207 device::has_doubles() const {
208 return pipe->get_param(pipe, PIPE_CAP_DOUBLES);
209 }
210
211 bool
has_halves() const212 device::has_halves() const {
213 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
214 PIPE_SHADER_CAP_FP16);
215 }
216
217 bool
has_int64_atomics() const218 device::has_int64_atomics() const {
219 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
220 PIPE_SHADER_CAP_INT64_ATOMICS);
221 }
222
223 bool
has_unified_memory() const224 device::has_unified_memory() const {
225 return pipe->get_param(pipe, PIPE_CAP_UMA);
226 }
227
228 size_t
mem_base_addr_align() const229 device::mem_base_addr_align() const {
230 return std::max((size_t)sysconf(_SC_PAGESIZE), sizeof(cl_long) * 16);
231 }
232
233 cl_device_svm_capabilities
svm_support() const234 device::svm_support() const {
235 // Without CAP_RESOURCE_FROM_USER_MEMORY SVM and CL_MEM_USE_HOST_PTR
236 // interactions won't work according to spec as clover manages a GPU side
237 // copy of the host data.
238 //
239 // The biggest problem are memory buffers created with CL_MEM_USE_HOST_PTR,
240 // but the application and/or the kernel updates the memory via SVM and not
241 // the cl_mem buffer.
242 // We can't even do proper tracking on what memory might have been accessed
243 // as the host ptr to the buffer could be within a SVM region, where through
244 // the CL API there is no reliable way of knowing if a certain cl_mem buffer
245 // was accessed by a kernel or not and the runtime can't reliably know from
246 // which side the GPU buffer content needs to be updated.
247 //
248 // Another unsolvable scenario is a cl_mem object passed by cl_mem reference
249 // and SVM pointer into the same kernel at the same time.
250 if (allows_user_pointers() && pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
251 // we can emulate all lower levels if we support fine grain system
252 return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
253 CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
254 CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
255 return 0;
256 }
257
258 bool
allows_user_pointers() const259 device::allows_user_pointers() const {
260 return pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) ||
261 pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY_COMPUTE_ONLY);
262 }
263
264 std::vector<size_t>
max_block_size() const265 device::max_block_size() const {
266 auto v = get_compute_param<uint64_t>(pipe, ir_format(),
267 PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
268 return { v.begin(), v.end() };
269 }
270
271 cl_uint
subgroup_size() const272 device::subgroup_size() const {
273 return get_compute_param<uint32_t>(pipe, ir_format(),
274 PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
275 }
276
277 cl_uint
address_bits() const278 device::address_bits() const {
279 return get_compute_param<uint32_t>(pipe, ir_format(),
280 PIPE_COMPUTE_CAP_ADDRESS_BITS)[0];
281 }
282
283 std::string
device_name() const284 device::device_name() const {
285 return pipe->get_name(pipe);
286 }
287
288 std::string
vendor_name() const289 device::vendor_name() const {
290 return pipe->get_device_vendor(pipe);
291 }
292
293 enum pipe_shader_ir
ir_format() const294 device::ir_format() const {
295 if (supports_ir(PIPE_SHADER_IR_NATIVE))
296 return PIPE_SHADER_IR_NATIVE;
297
298 assert(supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED));
299 return PIPE_SHADER_IR_NIR_SERIALIZED;
300 }
301
302 std::string
ir_target() const303 device::ir_target() const {
304 std::vector<char> target = get_compute_param<char>(
305 pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
306 return { target.data() };
307 }
308
309 enum pipe_endian
endianness() const310 device::endianness() const {
311 return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
312 }
313
314 std::string
device_version() const315 device::device_version() const {
316 static const std::string device_version =
317 debug_get_option("CLOVER_DEVICE_VERSION_OVERRIDE", "1.1");
318 return device_version;
319 }
320
321 std::string
device_clc_version() const322 device::device_clc_version() const {
323 static const std::string device_clc_version =
324 debug_get_option("CLOVER_DEVICE_CLC_VERSION_OVERRIDE", "1.1");
325 return device_clc_version;
326 }
327
328 bool
supports_ir(enum pipe_shader_ir ir) const329 device::supports_ir(enum pipe_shader_ir ir) const {
330 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
331 PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << ir);
332 }
333
334 std::string
supported_extensions() const335 device::supported_extensions() const {
336 return
337 "cl_khr_byte_addressable_store"
338 " cl_khr_global_int32_base_atomics"
339 " cl_khr_global_int32_extended_atomics"
340 " cl_khr_local_int32_base_atomics"
341 " cl_khr_local_int32_extended_atomics"
342 + std::string(has_int64_atomics() ? " cl_khr_int64_base_atomics" : "")
343 + std::string(has_int64_atomics() ? " cl_khr_int64_extended_atomics" : "")
344 + std::string(has_doubles() ? " cl_khr_fp64" : "")
345 + std::string(has_halves() ? " cl_khr_fp16" : "")
346 + std::string(svm_support() ? " cl_arm_shared_virtual_memory" : "");
347 }
348
349 const void *
get_compiler_options(enum pipe_shader_ir ir) const350 device::get_compiler_options(enum pipe_shader_ir ir) const {
351 return pipe->get_compiler_options(pipe, ir, PIPE_SHADER_COMPUTE);
352 }
353