• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2020-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/CL/CLCompileContext.h"
25 #include "arm_compute/core/CL/OpenCL.h"
26 
27 #include "arm_compute/core/CL/CLHelpers.h"
28 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/Utils.h"
30 #include "support/StringSupport.h"
31 
32 #include <regex>
33 
34 namespace arm_compute
35 {
CLBuildOptions()36 CLBuildOptions::CLBuildOptions()
37     : _build_opts()
38 {
39 }
40 
add_option(std::string option)41 void CLBuildOptions::add_option(std::string option)
42 {
43     _build_opts.emplace(std::move(option));
44 }
45 
add_option_if(bool cond,std::string option)46 void CLBuildOptions::add_option_if(bool cond, std::string option)
47 {
48     if(cond)
49     {
50         add_option(std::move(option));
51     }
52 }
53 
add_option_if_else(bool cond,std::string option_true,std::string option_false)54 void CLBuildOptions::add_option_if_else(bool cond, std::string option_true, std::string option_false)
55 {
56     (cond) ? add_option(std::move(option_true)) : add_option(std::move(option_false));
57 }
58 
add_options(const StringSet & options)59 void CLBuildOptions::add_options(const StringSet &options)
60 {
61     _build_opts.insert(options.begin(), options.end());
62 }
63 
add_options_if(bool cond,const StringSet & options)64 void CLBuildOptions::add_options_if(bool cond, const StringSet &options)
65 {
66     if(cond)
67     {
68         add_options(options);
69     }
70 }
71 
options() const72 const CLBuildOptions::StringSet &CLBuildOptions::options() const
73 {
74     return _build_opts;
75 }
76 
operator ==(const CLBuildOptions & other) const77 bool CLBuildOptions::operator==(const CLBuildOptions &other) const
78 {
79     return _build_opts == other._build_opts;
80 }
81 
Program()82 Program::Program()
83     : _context(), _device(), _is_binary(false), _name(), _source(), _binary()
84 {
85 }
86 
Program(cl::Context context,std::string name,std::string source)87 Program::Program(cl::Context context, std::string name, std::string source)
88     : _context(std::move(context)), _device(), _is_binary(false), _name(std::move(name)), _source(std::move(source)), _binary()
89 {
90 }
91 
Program(cl::Context context,cl::Device device,std::string name,std::vector<unsigned char> binary)92 Program::Program(cl::Context context, cl::Device device, std::string name, std::vector<unsigned char> binary)
93     : _context(std::move(context)), _device(std::move(device)), _is_binary(true), _name(std::move(name)), _source(), _binary(std::move(binary))
94 {
95 }
96 
operator cl::Program() const97 Program::operator cl::Program() const
98 {
99     if(_is_binary)
100     {
101         return cl::Program(_context, { _device }, { _binary });
102     }
103     else
104     {
105         return cl::Program(_context, _source, false);
106     }
107 }
108 
build(const cl::Program & program,const std::string & build_options)109 bool Program::build(const cl::Program &program, const std::string &build_options)
110 {
111     try
112     {
113         return program.build(build_options.c_str()) == CL_SUCCESS;
114     }
115     catch(const cl::Error &e)
116     {
117         cl_int     err        = CL_SUCCESS;
118         const auto build_info = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&err);
119 
120         for(auto &pair : build_info)
121         {
122             std::cerr << pair.second << std::endl;
123         }
124 
125         return false;
126     }
127 }
128 
build(const std::string & build_options) const129 cl::Program Program::build(const std::string &build_options) const
130 {
131     cl::Program cl_program = static_cast<cl::Program>(*this);
132     build(cl_program, build_options);
133     return cl_program;
134 }
135 
Kernel()136 Kernel::Kernel()
137     : _name(), _kernel()
138 {
139 }
140 
Kernel(std::string name,const cl::Program & program)141 Kernel::Kernel(std::string name, const cl::Program &program)
142     : _name(std::move(name)),
143       _kernel(cl::Kernel(program, _name.c_str()))
144 {
145 }
CLCompileContext()146 CLCompileContext::CLCompileContext()
147     : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
148 {
149 }
150 
CLCompileContext(cl::Context context,const cl::Device & device)151 CLCompileContext::CLCompileContext(cl::Context context, const cl::Device &device)
152     : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
153 {
154     _context           = std::move(context);
155     _device            = CLDevice(device);
156     _is_wbsm_supported = get_wbsm_support_info(device);
157 }
158 
create_kernel(const std::string & kernel_name,const std::string & program_name,const std::string & program_source,const std::string & kernel_path,const StringSet & build_options_set,bool is_binary) const159 Kernel CLCompileContext::create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
160                                        const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
161 {
162     const std::string build_options      = generate_build_options(build_options_set, kernel_path);
163     const std::string built_program_name = program_name + "_" + build_options;
164     auto              built_program_it   = _built_programs_map.find(built_program_name);
165     cl::Program       cl_program;
166 
167     if(_built_programs_map.end() != built_program_it)
168     {
169         // If program has been built, retrieve to create kernel from it
170         cl_program = built_program_it->second;
171     }
172     else
173     {
174         Program program = load_program(program_name, program_source, is_binary);
175 
176         // Build program
177         cl_program = program.build(build_options);
178 
179         // Add built program to internal map
180         _built_programs_map.emplace(built_program_name, cl_program);
181     }
182 
183     // Create and return kernel
184     return Kernel(kernel_name, cl_program);
185 }
186 
load_program(const std::string & program_name,const std::string & program_source,bool is_binary) const187 const Program &CLCompileContext::load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const
188 {
189     const auto program_it = _programs_map.find(program_name);
190 
191     if(program_it != _programs_map.end())
192     {
193         return program_it->second;
194     }
195 
196     Program program;
197 
198 #ifdef EMBEDDED_KERNELS
199     ARM_COMPUTE_UNUSED(is_binary);
200     program = Program(_context, program_name, program_source);
201 #else  /* EMBEDDED_KERNELS */
202     if(is_binary)
203     {
204         program = Program(_context, _device.cl_device(), program_name, std::vector<unsigned char>(program_source.begin(), program_source.end()));
205     }
206     else
207     {
208         program = Program(_context, program_name, program_source);
209     }
210 #endif /* EMBEDDED_KERNELS */
211 
212     // Insert program to program map
213     const auto new_program = _programs_map.emplace(program_name, std::move(program));
214 
215     return new_program.first->second;
216 }
217 
set_context(cl::Context context)218 void CLCompileContext::set_context(cl::Context context)
219 {
220     _context = std::move(context);
221     if(_context.get() != nullptr)
222     {
223         const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
224 
225         if(!cl_devices.empty())
226         {
227             _device = CLDevice(cl_devices[0]);
228         }
229     }
230 }
231 
generate_build_options(const StringSet & build_options_set,const std::string & kernel_path) const232 std::string CLCompileContext::generate_build_options(const StringSet &build_options_set, const std::string &kernel_path) const
233 {
234     std::string concat_str;
235     bool        ext_supported = false;
236     std::string ext_buildopts;
237 
238 #if defined(ARM_COMPUTE_DEBUG_ENABLED)
239     // Enable debug properties in CL kernels
240     concat_str += " -DARM_COMPUTE_DEBUG_ENABLED";
241 #endif // defined(ARM_COMPUTE_DEBUG_ENABLED)
242 
243     GPUTarget gpu_arch = get_arch_from_target(_device.target());
244     concat_str += " -DGPU_ARCH=" + support::cpp11::to_string(
245                       static_cast<std::underlying_type<GPUTarget>::type>(gpu_arch));
246 
247     if(_device.supported("cl_khr_fp16"))
248     {
249         concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
250     }
251 
252     if(_device.supported("cl_arm_integer_dot_product_int8") || _device.supported("cl_khr_integer_dot_product"))
253     {
254         concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ENABLED=1 ";
255     }
256 
257     if(_device.supported("cl_arm_integer_dot_product_accumulate_int8"))
258     {
259         concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED=1 ";
260     }
261 
262     std::tie(ext_supported, ext_buildopts) = _device.is_non_uniform_workgroup_supported();
263 
264     if(ext_supported)
265     {
266         concat_str += ext_buildopts;
267     }
268     else
269     {
270         ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
271     }
272 
273     if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD && get_ddk_version() >= 11)
274     {
275         concat_str += " -DUNROLL_WITH_PRAGMA ";
276     }
277 
278     std::string build_options = stringify_set(build_options_set, kernel_path) + concat_str;
279 
280     return build_options;
281 }
282 
fp16_supported() const283 bool CLCompileContext::fp16_supported() const
284 {
285     return _device.supported("cl_khr_fp16");
286 }
287 
stringify_set(const StringSet & s,const std::string & kernel_path) const288 std::string CLCompileContext::stringify_set(const StringSet &s, const std::string &kernel_path) const
289 {
290     std::string concat_set;
291 #ifndef EMBEDDED_KERNELS
292     concat_set += "-I" + kernel_path + " ";
293 #else  /* EMBEDDED_KERNELS */
294     ARM_COMPUTE_UNUSED(kernel_path);
295 #endif /* EMBEDDED_KERNELS */
296 
297     // Concatenate set
298     for(const auto &el : s)
299     {
300         concat_set += " " + el;
301     }
302 
303     return concat_set;
304 }
305 
add_built_program(const std::string & built_program_name,const cl::Program & program) const306 void CLCompileContext::add_built_program(const std::string &built_program_name, const cl::Program &program) const
307 {
308     _built_programs_map.emplace(built_program_name, program);
309 }
310 
clear_programs_cache()311 void CLCompileContext::clear_programs_cache()
312 {
313     _programs_map.clear();
314     _built_programs_map.clear();
315 }
316 
get_built_programs() const317 const std::map<std::string, cl::Program> &CLCompileContext::get_built_programs() const
318 {
319     return _built_programs_map;
320 }
321 
context()322 cl::Context &CLCompileContext::context()
323 {
324     return _context;
325 }
326 
get_device() const327 const cl::Device &CLCompileContext::get_device() const
328 {
329     return _device.cl_device();
330 }
331 
set_device(cl::Device device)332 void CLCompileContext::set_device(cl::Device device)
333 {
334     _device            = std::move(device);
335     _is_wbsm_supported = get_wbsm_support_info(device);
336 }
337 
default_ndrange() const338 cl::NDRange CLCompileContext::default_ndrange() const
339 {
340     GPUTarget   _target = get_target_from_device(_device.cl_device());
341     cl::NDRange default_range;
342 
343     switch(_target)
344     {
345         case GPUTarget::MIDGARD:
346         case GPUTarget::T600:
347         case GPUTarget::T700:
348         case GPUTarget::T800:
349             default_range = cl::NDRange(128u, 1);
350             break;
351         default:
352             default_range = cl::NullRange;
353     }
354 
355     return default_range;
356 }
357 
int64_base_atomics_supported() const358 bool CLCompileContext::int64_base_atomics_supported() const
359 {
360     return _device.supported("cl_khr_int64_base_atomics");
361 }
362 
is_wbsm_supported() const363 bool CLCompileContext::is_wbsm_supported() const
364 {
365     return _is_wbsm_supported;
366 }
367 
max_local_workgroup_size(const cl::Kernel & kernel) const368 size_t CLCompileContext::max_local_workgroup_size(const cl::Kernel &kernel) const
369 {
370     size_t result;
371 
372     size_t err = kernel.getWorkGroupInfo(_device.cl_device(), CL_KERNEL_WORK_GROUP_SIZE, &result);
373     ARM_COMPUTE_ERROR_ON_MSG(err != 0, "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
374     ARM_COMPUTE_UNUSED(err);
375 
376     return result;
377 }
378 
get_device_version() const379 std::string CLCompileContext::get_device_version() const
380 {
381     return _device.device_version();
382 }
383 
get_num_compute_units() const384 cl_uint CLCompileContext::get_num_compute_units() const
385 {
386     return _device.compute_units();
387 }
388 
get_ddk_version() const389 int32_t CLCompileContext::get_ddk_version() const
390 {
391     const std::string device_version = _device.device_version();
392     const std::regex  ddk_regex("r([0-9]*)p[0-9]");
393     std::smatch       ddk_match;
394 
395     if(std::regex_search(device_version, ddk_match, ddk_regex))
396     {
397         return std::stoi(ddk_match[1]);
398     }
399 
400     return -1;
401 }
get_gpu_target() const402 GPUTarget CLCompileContext::get_gpu_target() const
403 {
404     return _device.target();
405 }
406 } // namespace arm_compute
407