• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLTUNER_H
25 #define ARM_COMPUTE_CLTUNER_H
26 
27 #include "arm_compute/core/CL/OpenCL.h"
28 #include "arm_compute/runtime/CL/CLTunerTypes.h"
29 #include "arm_compute/runtime/CL/ICLTuner.h"
30 
31 #include <unordered_map>
32 
33 namespace arm_compute
34 {
35 class ICLKernel;
36 
37 /** Basic implementation of the OpenCL tuner interface */
38 class CLTuner : public ICLTuner
39 {
40 public:
41     /** Constructor
42      *
43      * @param[in] tune_new_kernels Find the optimal local workgroup size for kernels which are not present in the table ?
44      *
45      */
46     CLTuner(bool tune_new_kernels = true);
47 
48     /** Destructor */
49     ~CLTuner() = default;
50 
51     /** Setter for tune_new_kernels option
52      *
53      * @param[in] tune_new_kernels Find the optimal local workgroup size for kernels which are not present in the table ?
54      */
55     void set_tune_new_kernels(bool tune_new_kernels);
56     /** Tune kernels that are not in the LWS table
57      *
58      * @return True if tuning of new kernels is enabled.
59      */
60     bool tune_new_kernels() const;
61 
62     /** Set OpenCL tuner mode
63      *
64      * @param[in] mode Indicates how exhaustive the search for the optimal LWS should be while tuning. Default is Exhaustive mode
65      */
66     void set_tuner_mode(CLTunerMode mode);
67 
68     /** Get the current OpenCL tuner mode
69      *
70      * @return tuner_mode Indicates how exhaustive the search for the optimal LWS should be while tuning
71      */
72     CLTunerMode get_tuner_mode() const;
73 
74     /** Manually add a LWS for a kernel
75      *
76      * @param[in] kernel_id   Unique identifiant of the kernel
77      * @param[in] optimal_lws Optimal local workgroup size to use for the given kernel
78      */
79     void add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal_lws);
80 
81     /** Import LWS table
82      *
83      * @param[in] lws_table The unordered_map container to import
84      */
85     void import_lws_table(const std::unordered_map<std::string, cl::NDRange> &lws_table);
86 
87     /** Give read access to the LWS table
88      *
89      * @return The lws table as unordered_map container
90      */
91     const std::unordered_map<std::string, cl::NDRange> &lws_table() const;
92 
93     /** Set the OpenCL kernel event
94      *
95      * @note The interceptor can use this function to store the event associated to the OpenCL kernel
96      *
97      * @param[in] kernel_event The OpenCL kernel event
98      */
99     void set_cl_kernel_event(cl_event kernel_event);
100 
101     /** clEnqueueNDRangeKernel symbol */
102     std::function<decltype(clEnqueueNDRangeKernel)> real_clEnqueueNDRangeKernel;
103 
104     /** Load the LWS table from file
105      *
106      * @param[in] filename Load the LWS table from this file.(Must exist)
107      */
108     void load_from_file(const std::string &filename);
109 
110     /** Save the content of the LWS table to file
111      *
112      * @param[in] filename Save the LWS table to this file. (Content will be overwritten)
113      */
114     void save_to_file(const std::string &filename) const;
115 
116     // Inherited methods overridden:
117     void tune_kernel_static(ICLKernel &kernel) override;
118     void tune_kernel_dynamic(ICLKernel &kernel) override;
119     void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override;
120 
121     /** Is the kernel_event set ?
122      *
123      * @return true if the kernel_event is set.
124      */
125     bool kernel_event_is_set() const;
126 
127 private:
128     /** Find optimal LWS using brute-force approach
129      *
130      * @param[in]     kernel  OpenCL kernel to be tuned with LWS
131      * @param[in,out] tensors Tensors for the kernel to operate on
132      *
133      * @return The optimal LWS to use
134      */
135     cl::NDRange find_optimal_lws(ICLKernel &kernel, ITensorPack &tensors);
136 
137     std::unordered_map<std::string, cl::NDRange> _lws_table;
138     cl::Event   _kernel_event;
139     bool        _tune_new_kernels;
140     CLTunerMode _tuner_mode;
141 };
142 } // namespace arm_compute
143 #endif /*ARM_COMPUTE_CLTUNER_H */
144