1 /*
2 * Copyright (c) 2020-2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/core/CL/CLCompileContext.h"
25 #include "arm_compute/core/CL/OpenCL.h"
26
27 #include "arm_compute/core/CL/CLHelpers.h"
28 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/Utils.h"
30 #include "support/StringSupport.h"
31
32 #include <regex>
33
34 namespace arm_compute
35 {
CLBuildOptions()36 CLBuildOptions::CLBuildOptions()
37 : _build_opts()
38 {
39 }
40
add_option(std::string option)41 void CLBuildOptions::add_option(std::string option)
42 {
43 _build_opts.emplace(std::move(option));
44 }
45
add_option_if(bool cond,std::string option)46 void CLBuildOptions::add_option_if(bool cond, std::string option)
47 {
48 if(cond)
49 {
50 add_option(std::move(option));
51 }
52 }
53
add_option_if_else(bool cond,std::string option_true,std::string option_false)54 void CLBuildOptions::add_option_if_else(bool cond, std::string option_true, std::string option_false)
55 {
56 (cond) ? add_option(std::move(option_true)) : add_option(std::move(option_false));
57 }
58
add_options(const StringSet & options)59 void CLBuildOptions::add_options(const StringSet &options)
60 {
61 _build_opts.insert(options.begin(), options.end());
62 }
63
add_options_if(bool cond,const StringSet & options)64 void CLBuildOptions::add_options_if(bool cond, const StringSet &options)
65 {
66 if(cond)
67 {
68 add_options(options);
69 }
70 }
71
options() const72 const CLBuildOptions::StringSet &CLBuildOptions::options() const
73 {
74 return _build_opts;
75 }
76
operator ==(const CLBuildOptions & other) const77 bool CLBuildOptions::operator==(const CLBuildOptions &other) const
78 {
79 return _build_opts == other._build_opts;
80 }
81
Program()82 Program::Program()
83 : _context(), _device(), _is_binary(false), _name(), _source(), _binary()
84 {
85 }
86
Program(cl::Context context,std::string name,std::string source)87 Program::Program(cl::Context context, std::string name, std::string source)
88 : _context(std::move(context)), _device(), _is_binary(false), _name(std::move(name)), _source(std::move(source)), _binary()
89 {
90 }
91
Program(cl::Context context,cl::Device device,std::string name,std::vector<unsigned char> binary)92 Program::Program(cl::Context context, cl::Device device, std::string name, std::vector<unsigned char> binary)
93 : _context(std::move(context)), _device(std::move(device)), _is_binary(true), _name(std::move(name)), _source(), _binary(std::move(binary))
94 {
95 }
96
operator cl::Program() const97 Program::operator cl::Program() const
98 {
99 if(_is_binary)
100 {
101 return cl::Program(_context, { _device }, { _binary });
102 }
103 else
104 {
105 return cl::Program(_context, _source, false);
106 }
107 }
108
build(const cl::Program & program,const std::string & build_options)109 bool Program::build(const cl::Program &program, const std::string &build_options)
110 {
111 try
112 {
113 return program.build(build_options.c_str()) == CL_SUCCESS;
114 }
115 catch(const cl::Error &e)
116 {
117 cl_int err = CL_SUCCESS;
118 const auto build_info = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&err);
119
120 for(auto &pair : build_info)
121 {
122 std::cerr << pair.second << std::endl;
123 }
124
125 return false;
126 }
127 }
128
build(const std::string & build_options) const129 cl::Program Program::build(const std::string &build_options) const
130 {
131 cl::Program cl_program = static_cast<cl::Program>(*this);
132 build(cl_program, build_options);
133 return cl_program;
134 }
135
Kernel()136 Kernel::Kernel()
137 : _name(), _kernel()
138 {
139 }
140
Kernel(std::string name,const cl::Program & program)141 Kernel::Kernel(std::string name, const cl::Program &program)
142 : _name(std::move(name)),
143 _kernel(cl::Kernel(program, _name.c_str()))
144 {
145 }
CLCompileContext()146 CLCompileContext::CLCompileContext()
147 : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
148 {
149 }
150
CLCompileContext(cl::Context context,const cl::Device & device)151 CLCompileContext::CLCompileContext(cl::Context context, const cl::Device &device)
152 : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
153 {
154 _context = std::move(context);
155 _device = CLDevice(device);
156 _is_wbsm_supported = get_wbsm_support_info(device);
157 }
158
create_kernel(const std::string & kernel_name,const std::string & program_name,const std::string & program_source,const std::string & kernel_path,const StringSet & build_options_set,bool is_binary) const159 Kernel CLCompileContext::create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
160 const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
161 {
162 const std::string build_options = generate_build_options(build_options_set, kernel_path);
163 const std::string built_program_name = program_name + "_" + build_options;
164 auto built_program_it = _built_programs_map.find(built_program_name);
165 cl::Program cl_program;
166
167 if(_built_programs_map.end() != built_program_it)
168 {
169 // If program has been built, retrieve to create kernel from it
170 cl_program = built_program_it->second;
171 }
172 else
173 {
174 Program program = load_program(program_name, program_source, is_binary);
175
176 // Build program
177 cl_program = program.build(build_options);
178
179 // Add built program to internal map
180 _built_programs_map.emplace(built_program_name, cl_program);
181 }
182
183 // Create and return kernel
184 return Kernel(kernel_name, cl_program);
185 }
186
load_program(const std::string & program_name,const std::string & program_source,bool is_binary) const187 const Program &CLCompileContext::load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const
188 {
189 const auto program_it = _programs_map.find(program_name);
190
191 if(program_it != _programs_map.end())
192 {
193 return program_it->second;
194 }
195
196 Program program;
197
198 #ifdef EMBEDDED_KERNELS
199 ARM_COMPUTE_UNUSED(is_binary);
200 program = Program(_context, program_name, program_source);
201 #else /* EMBEDDED_KERNELS */
202 if(is_binary)
203 {
204 program = Program(_context, _device.cl_device(), program_name, std::vector<unsigned char>(program_source.begin(), program_source.end()));
205 }
206 else
207 {
208 program = Program(_context, program_name, program_source);
209 }
210 #endif /* EMBEDDED_KERNELS */
211
212 // Insert program to program map
213 const auto new_program = _programs_map.emplace(program_name, std::move(program));
214
215 return new_program.first->second;
216 }
217
set_context(cl::Context context)218 void CLCompileContext::set_context(cl::Context context)
219 {
220 _context = std::move(context);
221 if(_context.get() != nullptr)
222 {
223 const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
224
225 if(!cl_devices.empty())
226 {
227 _device = CLDevice(cl_devices[0]);
228 }
229 }
230 }
231
generate_build_options(const StringSet & build_options_set,const std::string & kernel_path) const232 std::string CLCompileContext::generate_build_options(const StringSet &build_options_set, const std::string &kernel_path) const
233 {
234 std::string concat_str;
235 bool ext_supported = false;
236 std::string ext_buildopts;
237
238 #if defined(ARM_COMPUTE_DEBUG_ENABLED)
239 // Enable debug properties in CL kernels
240 concat_str += " -DARM_COMPUTE_DEBUG_ENABLED";
241 #endif // defined(ARM_COMPUTE_DEBUG_ENABLED)
242
243 GPUTarget gpu_arch = get_arch_from_target(_device.target());
244 concat_str += " -DGPU_ARCH=" + support::cpp11::to_string(
245 static_cast<std::underlying_type<GPUTarget>::type>(gpu_arch));
246
247 if(_device.supported("cl_khr_fp16"))
248 {
249 concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
250 }
251
252 if(_device.supported("cl_arm_integer_dot_product_int8") || _device.supported("cl_khr_integer_dot_product"))
253 {
254 concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ENABLED=1 ";
255 }
256
257 if(_device.supported("cl_arm_integer_dot_product_accumulate_int8"))
258 {
259 concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED=1 ";
260 }
261
262 std::tie(ext_supported, ext_buildopts) = _device.is_non_uniform_workgroup_supported();
263
264 if(ext_supported)
265 {
266 concat_str += ext_buildopts;
267 }
268 else
269 {
270 ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
271 }
272
273 if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD && get_ddk_version() >= 11)
274 {
275 concat_str += " -DUNROLL_WITH_PRAGMA ";
276 }
277
278 std::string build_options = stringify_set(build_options_set, kernel_path) + concat_str;
279
280 return build_options;
281 }
282
fp16_supported() const283 bool CLCompileContext::fp16_supported() const
284 {
285 return _device.supported("cl_khr_fp16");
286 }
287
stringify_set(const StringSet & s,const std::string & kernel_path) const288 std::string CLCompileContext::stringify_set(const StringSet &s, const std::string &kernel_path) const
289 {
290 std::string concat_set;
291 #ifndef EMBEDDED_KERNELS
292 concat_set += "-I" + kernel_path + " ";
293 #else /* EMBEDDED_KERNELS */
294 ARM_COMPUTE_UNUSED(kernel_path);
295 #endif /* EMBEDDED_KERNELS */
296
297 // Concatenate set
298 for(const auto &el : s)
299 {
300 concat_set += " " + el;
301 }
302
303 return concat_set;
304 }
305
add_built_program(const std::string & built_program_name,const cl::Program & program) const306 void CLCompileContext::add_built_program(const std::string &built_program_name, const cl::Program &program) const
307 {
308 _built_programs_map.emplace(built_program_name, program);
309 }
310
clear_programs_cache()311 void CLCompileContext::clear_programs_cache()
312 {
313 _programs_map.clear();
314 _built_programs_map.clear();
315 }
316
get_built_programs() const317 const std::map<std::string, cl::Program> &CLCompileContext::get_built_programs() const
318 {
319 return _built_programs_map;
320 }
321
context()322 cl::Context &CLCompileContext::context()
323 {
324 return _context;
325 }
326
get_device() const327 const cl::Device &CLCompileContext::get_device() const
328 {
329 return _device.cl_device();
330 }
331
set_device(cl::Device device)332 void CLCompileContext::set_device(cl::Device device)
333 {
334 _device = std::move(device);
335 _is_wbsm_supported = get_wbsm_support_info(device);
336 }
337
default_ndrange() const338 cl::NDRange CLCompileContext::default_ndrange() const
339 {
340 GPUTarget _target = get_target_from_device(_device.cl_device());
341 cl::NDRange default_range;
342
343 switch(_target)
344 {
345 case GPUTarget::MIDGARD:
346 case GPUTarget::T600:
347 case GPUTarget::T700:
348 case GPUTarget::T800:
349 default_range = cl::NDRange(128u, 1);
350 break;
351 default:
352 default_range = cl::NullRange;
353 }
354
355 return default_range;
356 }
357
int64_base_atomics_supported() const358 bool CLCompileContext::int64_base_atomics_supported() const
359 {
360 return _device.supported("cl_khr_int64_base_atomics");
361 }
362
is_wbsm_supported() const363 bool CLCompileContext::is_wbsm_supported() const
364 {
365 return _is_wbsm_supported;
366 }
367
max_local_workgroup_size(const cl::Kernel & kernel) const368 size_t CLCompileContext::max_local_workgroup_size(const cl::Kernel &kernel) const
369 {
370 size_t result;
371
372 size_t err = kernel.getWorkGroupInfo(_device.cl_device(), CL_KERNEL_WORK_GROUP_SIZE, &result);
373 ARM_COMPUTE_ERROR_ON_MSG(err != 0, "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
374 ARM_COMPUTE_UNUSED(err);
375
376 return result;
377 }
378
get_device_version() const379 std::string CLCompileContext::get_device_version() const
380 {
381 return _device.device_version();
382 }
383
get_num_compute_units() const384 cl_uint CLCompileContext::get_num_compute_units() const
385 {
386 return _device.compute_units();
387 }
388
get_ddk_version() const389 int32_t CLCompileContext::get_ddk_version() const
390 {
391 const std::string device_version = _device.device_version();
392 const std::regex ddk_regex("r([0-9]*)p[0-9]");
393 std::smatch ddk_match;
394
395 if(std::regex_search(device_version, ddk_match, ddk_regex))
396 {
397 return std::stoi(ddk_match[1]);
398 }
399
400 return -1;
401 }
get_gpu_target() const402 GPUTarget CLCompileContext::get_gpu_target() const
403 {
404 return _device.target();
405 }
406 } // namespace arm_compute
407